logstash-input-file 4.1.2 → 4.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/docs/index.asciidoc +68 -20
- data/lib/filewatch/observing_read.rb +0 -1
- data/lib/filewatch/read_mode/handlers/base.rb +6 -1
- data/lib/filewatch/read_mode/handlers/read_file.rb +2 -3
- data/lib/filewatch/read_mode/handlers/read_zip_file.rb +7 -6
- data/lib/filewatch/read_mode/processor.rb +4 -2
- data/lib/filewatch/sincedb_collection.rb +22 -10
- data/lib/filewatch/watch.rb +1 -0
- data/lib/jars/filewatch-1.0.0.jar +0 -0
- data/lib/logstash/inputs/file.rb +21 -8
- data/lib/logstash/inputs/friendly_durations.rb +45 -0
- data/logstash-input-file.gemspec +1 -1
- data/spec/filewatch/read_mode_handlers_read_file_spec.rb +40 -0
- data/spec/filewatch/spec_helper.rb +26 -0
- data/spec/filewatch/tailing_spec.rb +1 -1
- data/spec/inputs/friendly_durations_spec.rb +71 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44f11a07375e6cf964220179c9cea4259a9bb6829dbbdda85e05fef737a9e214
|
4
|
+
data.tar.gz: 9005ada3317a3d947bce138f9bc014647f79ad871db7cfd373642f51f94ca8bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 772fedf54c74b08d660f2a6aca1dd43f9ab04cb9290ef67d409e26419931bcf0fa26de027c075d561f35f40d55ac8b43c376c7b7c88f1f7982f1eb243361c7a3
|
7
|
+
data.tar.gz: dc5dc4aa91b870368967f92fc4129faa4e2b470e0bd942dda965711fab90def3658e9efd8f9158f3f98193a5aedcf23322bf65c7f5a28a4c32b84d6aac370118
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 4.1.3
|
2
|
+
- Fixed `read` mode of regular files sincedb write is requested in each read loop
|
3
|
+
iteration rather than waiting for the end-of-file to be reached. Note: for gz files,
|
4
|
+
the sincedb entry can only be updated at the end of the file as it is not possible
|
5
|
+
to seek into a compressed file and begin reading from that position.
|
6
|
+
[#196](https://github.com/logstash-plugins/logstash-input-file/pull/196)
|
7
|
+
- Added support for String Durations in some settings e.g. `stat_interval => "750 ms"`
|
8
|
+
[#194](https://github.com/logstash-plugins/logstash-input-file/pull/194)
|
9
|
+
|
1
10
|
## 4.1.2
|
2
11
|
- Fix `require winhelper` error in WINDOWS.
|
3
12
|
[Issue #184](https://github.com/logstash-plugins/logstash-input-file/issues/184)
|
data/docs/index.asciidoc
CHANGED
@@ -146,10 +146,15 @@ will not get picked up.
|
|
146
146
|
|
147
147
|
This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
|
148
148
|
|
149
|
+
[NOTE]
|
150
|
+
Duration settings can be specified in text form e.g. "250 ms", this string will be converted into
|
151
|
+
decimal seconds. There are quite a few supported natural and abbreviated durations,
|
152
|
+
see <<string_duration,string duration>> for the details.
|
153
|
+
|
149
154
|
[cols="<,<,<",options="header",]
|
150
155
|
|=======================================================================
|
151
156
|
|Setting |Input type|Required
|
152
|
-
| <<plugins-{type}s-{plugin}-close_older>> |<<number,number>>|No
|
157
|
+
| <<plugins-{type}s-{plugin}-close_older>> |<<number,number>> or <<string_duration,string duration>>|No
|
153
158
|
| <<plugins-{type}s-{plugin}-delimiter>> |<<string,string>>|No
|
154
159
|
| <<plugins-{type}s-{plugin}-discover_interval>> |<<number,number>>|No
|
155
160
|
| <<plugins-{type}s-{plugin}-exclude>> |<<array,array>>|No
|
@@ -159,15 +164,15 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
159
164
|
| <<plugins-{type}s-{plugin}-file_completed_log_path>> |<<string,string>>|No
|
160
165
|
| <<plugins-{type}s-{plugin}-file_sort_by>> |<<string,string>>, one of `["last_modified", "path"]`|No
|
161
166
|
| <<plugins-{type}s-{plugin}-file_sort_direction>> |<<string,string>>, one of `["asc", "desc"]`|No
|
162
|
-
| <<plugins-{type}s-{plugin}-ignore_older>> |<<number,number>>|No
|
167
|
+
| <<plugins-{type}s-{plugin}-ignore_older>> |<<number,number>> or <<string_duration,string duration>>|No
|
163
168
|
| <<plugins-{type}s-{plugin}-max_open_files>> |<<number,number>>|No
|
164
169
|
| <<plugins-{type}s-{plugin}-mode>> |<<string,string>>, one of `["tail", "read"]`|No
|
165
170
|
| <<plugins-{type}s-{plugin}-path>> |<<array,array>>|Yes
|
166
|
-
| <<plugins-{type}s-{plugin}-sincedb_clean_after>> |<<number,number>>|No
|
171
|
+
| <<plugins-{type}s-{plugin}-sincedb_clean_after>> |<<number,number>> or <<string_duration,string duration>>|No
|
167
172
|
| <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
|
168
|
-
| <<plugins-{type}s-{plugin}-sincedb_write_interval>> |<<number,number>>|No
|
173
|
+
| <<plugins-{type}s-{plugin}-sincedb_write_interval>> |<<number,number>> or <<string_duration,string duration>>|No
|
169
174
|
| <<plugins-{type}s-{plugin}-start_position>> |<<string,string>>, one of `["beginning", "end"]`|No
|
170
|
-
| <<plugins-{type}s-{plugin}-stat_interval>> |<<number,number>>|No
|
175
|
+
| <<plugins-{type}s-{plugin}-stat_interval>> |<<number,number>> or <<string_duration,string duration>>|No
|
171
176
|
|=======================================================================
|
172
177
|
|
173
178
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -178,18 +183,18 @@ input plugins.
|
|
178
183
|
[id="plugins-{type}s-{plugin}-close_older"]
|
179
184
|
===== `close_older`
|
180
185
|
|
181
|
-
* Value type is <<number,number>>
|
182
|
-
* Default value is `
|
186
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
187
|
+
* Default value is `"1 hour"`
|
183
188
|
|
184
189
|
The file input closes any files that were last read the specified
|
185
|
-
|
190
|
+
duration (seconds if a number is specified) ago.
|
186
191
|
This has different implications depending on if a file is being tailed or
|
187
192
|
read. If tailing, and there is a large time gap in incoming data the file
|
188
193
|
can be closed (allowing other files to be opened) but will be queued for
|
189
194
|
reopening when new data is detected. If reading, the file will be closed
|
190
195
|
after closed_older seconds from when the last bytes were read.
|
191
196
|
This setting is retained for backward compatibility if you upgrade the
|
192
|
-
plugin to
|
197
|
+
plugin to 4.1.0+, are reading not tailing and do not switch to using Read mode.
|
193
198
|
|
194
199
|
[id="plugins-{type}s-{plugin}-delimiter"]
|
195
200
|
===== `delimiter`
|
@@ -206,8 +211,10 @@ this setting is not used, instead the standard Windows or Unix line endings are
|
|
206
211
|
* Value type is <<number,number>>
|
207
212
|
* Default value is `15`
|
208
213
|
|
209
|
-
How often
|
210
|
-
`
|
214
|
+
How often we expand the filename patterns in the `path` option to discover new files to watch.
|
215
|
+
This value is a multiple to `stat_interval`, e.g. if `stat_interval` is "500 ms" then new files
|
216
|
+
files could be discovered every 15 X 500 milliseconds - 7.5 seconds.
|
217
|
+
In practice, this will be the best case because the time taken to read new content needs to be factored in.
|
211
218
|
|
212
219
|
[id="plugins-{type}s-{plugin}-exclude"]
|
213
220
|
===== `exclude`
|
@@ -294,11 +301,11 @@ If you use special naming conventions for the file full paths then perhaps
|
|
294
301
|
[id="plugins-{type}s-{plugin}-ignore_older"]
|
295
302
|
===== `ignore_older`
|
296
303
|
|
297
|
-
* Value type is <<number,number>>
|
304
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
298
305
|
* There is no default value for this setting.
|
299
306
|
|
300
307
|
When the file input discovers a file that was last modified
|
301
|
-
before the specified
|
308
|
+
before the specified duration (seconds if a number is specified), the file is ignored.
|
302
309
|
After it's discovery, if an ignored file is modified it is no
|
303
310
|
longer ignored and any new data is read. By default, this option is
|
304
311
|
disabled. Note this unit is in seconds.
|
@@ -354,9 +361,9 @@ on the {logstash-ref}/configuration-file-structure.html#array[Logstash configura
|
|
354
361
|
[id="plugins-{type}s-{plugin}-sincedb_clean_after"]
|
355
362
|
===== `sincedb_clean_after`
|
356
363
|
|
357
|
-
* Value type is <<number,number>>
|
358
|
-
* The default value for this setting is
|
359
|
-
*
|
364
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
365
|
+
* The default value for this setting is "2 weeks".
|
366
|
+
* If a number is specified then it is interpreted as *days* and can be decimal e.g. 0.5 is 12 hours.
|
360
367
|
|
361
368
|
The sincedb record now has a last active timestamp associated with it.
|
362
369
|
If no changes are detected in a tracked file in the last N days its sincedb
|
@@ -378,8 +385,8 @@ NOTE: it must be a file path and not a directory path
|
|
378
385
|
[id="plugins-{type}s-{plugin}-sincedb_write_interval"]
|
379
386
|
===== `sincedb_write_interval`
|
380
387
|
|
381
|
-
* Value type is <<number,number>>
|
382
|
-
* Default value is `15`
|
388
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
389
|
+
* Default value is `"15 seconds"`
|
383
390
|
|
384
391
|
How often (in seconds) to write a since database with the current position of
|
385
392
|
monitored log files.
|
@@ -404,15 +411,56 @@ position recorded in the sincedb file will be used.
|
|
404
411
|
[id="plugins-{type}s-{plugin}-stat_interval"]
|
405
412
|
===== `stat_interval`
|
406
413
|
|
407
|
-
* Value type is <<number,number>>
|
408
|
-
* Default value is `1`
|
414
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
415
|
+
* Default value is `"1 second"`
|
409
416
|
|
410
417
|
How often (in seconds) we stat files to see if they have been modified.
|
411
418
|
Increasing this interval will decrease the number of system calls we make,
|
412
419
|
but increase the time to detect new log lines.
|
420
|
+
[NOTE]
|
421
|
+
Discovering new files and checking whether they have grown/or shrunk occurs in a loop.
|
422
|
+
This loop will sleep for `stat_interval` seconds before looping again. However, if files
|
423
|
+
have grown, the new content is read and lines are enqueued.
|
424
|
+
Reading and enqueuing across all grown files can take time, especially if
|
425
|
+
the pipeline is congested. So the overall loop time is a combination of the
|
426
|
+
`stat_interval` and the file read time.
|
413
427
|
|
414
428
|
[id="plugins-{type}s-{plugin}-common-options"]
|
415
429
|
include::{include_path}/{type}.asciidoc[]
|
416
430
|
|
417
431
|
:default_codec!:
|
418
432
|
|
433
|
+
[id="string_duration"]
|
434
|
+
// Move this to the includes when we make string durations available generally.
|
435
|
+
==== String Durations
|
436
|
+
|
437
|
+
Format is `number` `string` and the space between these is optional.
|
438
|
+
So "45s" and "45 s" are both valid.
|
439
|
+
[TIP]
|
440
|
+
Use the most suitable duration, for example, "3 days" rather than "72 hours".
|
441
|
+
|
442
|
+
===== Weeks
|
443
|
+
Supported values: `w` `week` `weeks`, e.g. "2 w", "1 week", "4 weeks".
|
444
|
+
|
445
|
+
===== Days
|
446
|
+
Supported values: `d` `day` `days`, e.g. "2 d", "1 day", "2.5 days".
|
447
|
+
|
448
|
+
===== Hours
|
449
|
+
Supported values: `h` `hour` `hours`, e.g. "4 h", "1 hour", "0.5 hours".
|
450
|
+
|
451
|
+
===== Minutes
|
452
|
+
Supported values: `m` `min` `minute` `minutes`, e.g. "45 m", "35 min", "1 minute", "6 minutes".
|
453
|
+
|
454
|
+
===== Seconds
|
455
|
+
Supported values: `s` `sec` `second` `seconds`, e.g. "45 s", "15 sec", "1 second", "2.5 seconds".
|
456
|
+
|
457
|
+
===== Milliseconds
|
458
|
+
Supported values: `ms` `msec` `msecs`, e.g. "500 ms", "750 msec", "50 msecs
|
459
|
+
[NOTE]
|
460
|
+
`milli` `millis` and `milliseconds` are not supported
|
461
|
+
|
462
|
+
===== Microseconds
|
463
|
+
Supported values: `us` `usec` `usecs`, e.g. "600 us", "800 usec", "900 usecs"
|
464
|
+
[NOTE]
|
465
|
+
`micro` `micros` and `microseconds` are not supported
|
466
|
+
|
@@ -7,12 +7,17 @@ module FileWatch module ReadMode module Handlers
|
|
7
7
|
|
8
8
|
attr_reader :sincedb_collection
|
9
9
|
|
10
|
-
def initialize(sincedb_collection, observer, settings)
|
10
|
+
def initialize(processor, sincedb_collection, observer, settings)
|
11
11
|
@settings = settings
|
12
|
+
@processor = processor
|
12
13
|
@sincedb_collection = sincedb_collection
|
13
14
|
@observer = observer
|
14
15
|
end
|
15
16
|
|
17
|
+
def quit?
|
18
|
+
@processor.watch.quit?
|
19
|
+
end
|
20
|
+
|
16
21
|
def handle(watched_file)
|
17
22
|
logger.debug("handling: #{watched_file.path}")
|
18
23
|
unless watched_file.has_listener?
|
@@ -5,13 +5,12 @@ module FileWatch module ReadMode module Handlers
|
|
5
5
|
def handle_specifically(watched_file)
|
6
6
|
if open_file(watched_file)
|
7
7
|
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
8
|
-
changed = false
|
9
8
|
@settings.file_chunk_count.times do
|
9
|
+
break if quit?
|
10
10
|
begin
|
11
11
|
data = watched_file.file_read(@settings.file_chunk_size)
|
12
12
|
result = watched_file.buffer_extract(data) # expect BufferExtractResult
|
13
13
|
logger.info(result.warning, result.additional) unless result.warning.empty?
|
14
|
-
changed = true
|
15
14
|
result.lines.each do |line|
|
16
15
|
watched_file.listener.accept(line)
|
17
16
|
# sincedb position is independent from the watched_file bytes_read
|
@@ -20,6 +19,7 @@ module FileWatch module ReadMode module Handlers
|
|
20
19
|
# instead of tracking the bytes_read line by line we need to track by the data read size.
|
21
20
|
# because we initially seek to the bytes_read not the sincedb position
|
22
21
|
watched_file.increment_bytes_read(data.bytesize)
|
22
|
+
sincedb_collection.request_disk_flush
|
23
23
|
rescue EOFError
|
24
24
|
# flush the buffer now in case there is no final delimiter
|
25
25
|
line = watched_file.buffer.flush
|
@@ -40,7 +40,6 @@ module FileWatch module ReadMode module Handlers
|
|
40
40
|
break
|
41
41
|
end
|
42
42
|
end
|
43
|
-
sincedb_collection.request_disk_flush if changed
|
44
43
|
end
|
45
44
|
end
|
46
45
|
end
|
@@ -13,10 +13,6 @@ module FileWatch module ReadMode module Handlers
|
|
13
13
|
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
14
14
|
# can't really stripe read a zip file, its all or nothing.
|
15
15
|
watched_file.listener.opened
|
16
|
-
# what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading)
|
17
|
-
# should we track lines read in the sincedb and
|
18
|
-
# fast forward through the lines until we reach unseen content?
|
19
|
-
# meaning that we can quit in the middle of a zip file
|
20
16
|
begin
|
21
17
|
file_stream = FileInputStream.new(watched_file.path)
|
22
18
|
gzip_stream = GZIPInputStream.new(file_stream)
|
@@ -24,14 +20,19 @@ module FileWatch module ReadMode module Handlers
|
|
24
20
|
buffered = BufferedReader.new(decoder)
|
25
21
|
while (line = buffered.readLine(false))
|
26
22
|
watched_file.listener.accept(line)
|
23
|
+
# can't quit, if we did then we would incorrectly write a 'completed' sincedb entry
|
24
|
+
# what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading)
|
25
|
+
# should we track lines read in the sincedb and
|
26
|
+
# fast forward through the lines until we reach unseen content?
|
27
|
+
# meaning that we can quit in the middle of a zip file
|
27
28
|
end
|
28
29
|
watched_file.listener.eof
|
29
30
|
rescue ZipException => e
|
30
31
|
logger.error("Cannot decompress the gzip file at path: #{watched_file.path}")
|
31
32
|
watched_file.listener.error
|
32
33
|
else
|
33
|
-
|
34
|
-
sincedb_collection.
|
34
|
+
watched_file.update_bytes_read(watched_file.last_stat_size)
|
35
|
+
sincedb_collection.unset_watched_file(watched_file)
|
35
36
|
watched_file.listener.deleted
|
36
37
|
watched_file.unwatch
|
37
38
|
ensure
|
@@ -25,8 +25,10 @@ module FileWatch module ReadMode
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def initialize_handlers(sincedb_collection, observer)
|
28
|
-
|
29
|
-
|
28
|
+
# we deviate from the tail mode handler initialization here
|
29
|
+
# by adding a reference to self so we can read the quit flag during a (depth first) read loop
|
30
|
+
@read_file = Handlers::ReadFile.new(self, sincedb_collection, observer, @settings)
|
31
|
+
@read_zip_file = Handlers::ReadZipFile.new(self, sincedb_collection, observer, @settings)
|
30
32
|
end
|
31
33
|
|
32
34
|
def read_file(watched_file)
|
@@ -20,14 +20,21 @@ module FileWatch
|
|
20
20
|
@write_method = LogStash::Environment.windows? || @path.chardev? || @path.blockdev? ? method(:non_atomic_write) : method(:atomic_write)
|
21
21
|
@full_path = @path.to_path
|
22
22
|
FileUtils.touch(@full_path)
|
23
|
+
@write_requested = false
|
24
|
+
end
|
25
|
+
|
26
|
+
def write_requested?
|
27
|
+
@write_requested
|
23
28
|
end
|
24
29
|
|
25
30
|
def request_disk_flush
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
@write_requested = true
|
32
|
+
flush_at_interval
|
33
|
+
end
|
34
|
+
|
35
|
+
def write_if_requested
|
36
|
+
if write_requested?
|
37
|
+
flush_at_interval
|
31
38
|
end
|
32
39
|
end
|
33
40
|
|
@@ -51,7 +58,6 @@ module FileWatch
|
|
51
58
|
#No existing sincedb to load
|
52
59
|
logger.debug("open: error: #{path}: #{e.inspect}")
|
53
60
|
end
|
54
|
-
|
55
61
|
end
|
56
62
|
|
57
63
|
def associate(watched_file)
|
@@ -130,10 +136,6 @@ module FileWatch
|
|
130
136
|
@sincedb[key].update_position(0)
|
131
137
|
end
|
132
138
|
|
133
|
-
def store_last_read(key, last_read)
|
134
|
-
@sincedb[key].update_position(last_read)
|
135
|
-
end
|
136
|
-
|
137
139
|
def increment(key, amount)
|
138
140
|
@sincedb[key].increment_position(amount)
|
139
141
|
end
|
@@ -167,6 +169,15 @@ module FileWatch
|
|
167
169
|
|
168
170
|
private
|
169
171
|
|
172
|
+
def flush_at_interval
|
173
|
+
now = Time.now.to_i
|
174
|
+
delta = now - @sincedb_last_write
|
175
|
+
if delta >= @settings.sincedb_write_interval
|
176
|
+
logger.debug("writing sincedb (delta since last write = #{delta})")
|
177
|
+
sincedb_write(now)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
170
181
|
def handle_association(sincedb_value, watched_file)
|
171
182
|
watched_file.update_bytes_read(sincedb_value.position)
|
172
183
|
sincedb_value.set_watched_file(watched_file)
|
@@ -193,6 +204,7 @@ module FileWatch
|
|
193
204
|
logger.debug("sincedb_write: cleaned", "key" => "'#{key}'")
|
194
205
|
end
|
195
206
|
@sincedb_last_write = time
|
207
|
+
@write_requested = false
|
196
208
|
rescue Errno::EACCES
|
197
209
|
# no file handles free perhaps
|
198
210
|
# maybe it will work next time
|
data/lib/filewatch/watch.rb
CHANGED
Binary file
|
data/lib/logstash/inputs/file.rb
CHANGED
@@ -11,6 +11,7 @@ require_relative "file/patch"
|
|
11
11
|
require_relative "file_listener"
|
12
12
|
require_relative "delete_completed_file_handler"
|
13
13
|
require_relative "log_completed_file_handler"
|
14
|
+
require_relative "friendly_durations"
|
14
15
|
require "filewatch/bootstrap"
|
15
16
|
|
16
17
|
# Stream events from files, normally by tailing them in a manner
|
@@ -109,7 +110,7 @@ class File < LogStash::Inputs::Base
|
|
109
110
|
# How often (in seconds) we stat files to see if they have been modified.
|
110
111
|
# Increasing this interval will decrease the number of system calls we make,
|
111
112
|
# but increase the time to detect new log lines.
|
112
|
-
config :stat_interval, :validate =>
|
113
|
+
config :stat_interval, :validate => [FriendlyDurations, "seconds"], :default => 1
|
113
114
|
|
114
115
|
# How often (in seconds) we expand the filename patterns in the
|
115
116
|
# `path` option to discover new files to watch.
|
@@ -123,7 +124,7 @@ class File < LogStash::Inputs::Base
|
|
123
124
|
|
124
125
|
# How often (in seconds) to write a since database with the current position of
|
125
126
|
# monitored log files.
|
126
|
-
config :sincedb_write_interval, :validate =>
|
127
|
+
config :sincedb_write_interval, :validate => [FriendlyDurations, "seconds"], :default => 15
|
127
128
|
|
128
129
|
# Choose where Logstash starts initially reading files: at the beginning or
|
129
130
|
# at the end. The default behavior treats files like live streams and thus
|
@@ -145,7 +146,7 @@ class File < LogStash::Inputs::Base
|
|
145
146
|
# After its discovery, if an ignored file is modified it is no
|
146
147
|
# longer ignored and any new data is read. By default, this option is
|
147
148
|
# disabled. Note this unit is in seconds.
|
148
|
-
config :ignore_older, :validate =>
|
149
|
+
config :ignore_older, :validate => [FriendlyDurations, "seconds"]
|
149
150
|
|
150
151
|
# The file input closes any files that were last read the specified
|
151
152
|
# timespan in seconds ago.
|
@@ -154,7 +155,7 @@ class File < LogStash::Inputs::Base
|
|
154
155
|
# reopening when new data is detected. If reading, the file will be closed
|
155
156
|
# after closed_older seconds from when the last bytes were read.
|
156
157
|
# The default is 1 hour
|
157
|
-
config :close_older, :validate =>
|
158
|
+
config :close_older, :validate => [FriendlyDurations, "seconds"], :default => "1 hour"
|
158
159
|
|
159
160
|
# What is the maximum number of file_handles that this input consumes
|
160
161
|
# at any one time. Use close_older to close some files if you need to
|
@@ -191,7 +192,7 @@ class File < LogStash::Inputs::Base
|
|
191
192
|
# If no changes are detected in tracked files in the last N days their sincedb
|
192
193
|
# tracking record will expire and not be persisted.
|
193
194
|
# This option protects against the well known inode recycling problem. (add reference)
|
194
|
-
config :sincedb_clean_after, :validate =>
|
195
|
+
config :sincedb_clean_after, :validate => [FriendlyDurations, "days"], :default => "14 days" # days
|
195
196
|
|
196
197
|
# File content is read off disk in blocks or chunks, then using whatever the set delimiter
|
197
198
|
# is, lines are extracted from the chunk. Specify the size in bytes of each chunk.
|
@@ -222,6 +223,20 @@ class File < LogStash::Inputs::Base
|
|
222
223
|
config :file_sort_direction, :validate => ["asc", "desc"], :default => "asc"
|
223
224
|
|
224
225
|
public
|
226
|
+
|
227
|
+
class << self
|
228
|
+
alias_method :old_validate_value, :validate_value
|
229
|
+
|
230
|
+
def validate_value(value, validator)
|
231
|
+
if validator.is_a?(Array) && validator.size == 2 && validator.first.respond_to?(:call)
|
232
|
+
callable, units = *validator
|
233
|
+
# returns a ValidatedStruct having a `to_a` method suitable to return to the config mixin caller
|
234
|
+
return callable.call(value, units).to_a
|
235
|
+
end
|
236
|
+
old_validate_value(value, validator)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
225
240
|
def register
|
226
241
|
require "addressable/uri"
|
227
242
|
require "digest/md5"
|
@@ -316,6 +331,7 @@ class File < LogStash::Inputs::Base
|
|
316
331
|
start_processing
|
317
332
|
@queue = queue
|
318
333
|
@watcher.subscribe(self) # halts here until quit is called
|
334
|
+
# last action of the subscribe call is to write the sincedb
|
319
335
|
exit_flush
|
320
336
|
end # def run
|
321
337
|
|
@@ -338,9 +354,6 @@ class File < LogStash::Inputs::Base
|
|
338
354
|
end
|
339
355
|
|
340
356
|
def stop
|
341
|
-
# in filewatch >= 0.6.7, quit will closes and forget all files
|
342
|
-
# but it will write their last read positions to since_db
|
343
|
-
# beforehand
|
344
357
|
if @watcher
|
345
358
|
@codec.close
|
346
359
|
@watcher.quit
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module Inputs
|
4
|
+
module FriendlyDurations
|
5
|
+
NUMBERS_RE = /^(?<number>\d+(\.\d+)?)\s?(?<units>s((ec)?(ond)?)(s)?|m((in)?(ute)?)(s)?|h(our)?(s)?|d(ay)?(s)?|w(eek)?(s)?|us(ec)?(s)?|ms(ec)?(s)?)?$/
|
6
|
+
HOURS = 3600
|
7
|
+
DAYS = 24 * HOURS
|
8
|
+
MEGA = 10**6
|
9
|
+
KILO = 10**3
|
10
|
+
|
11
|
+
ValidatedStruct = Struct.new(:value, :error_message) do
|
12
|
+
def to_a
|
13
|
+
error_message.nil? ? [true, value] : [false, error_message]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.call(value, unit = "sec")
|
18
|
+
# coerce into seconds
|
19
|
+
val_string = value.to_s.strip
|
20
|
+
matched = NUMBERS_RE.match(val_string)
|
21
|
+
if matched.nil?
|
22
|
+
failed_message = "Value '#{val_string}' is not a valid duration string e.g. 200 usec, 250ms, 60 sec, 18h, 21.5d, 1 day, 2w, 6 weeks"
|
23
|
+
return ValidatedStruct.new(nil, failed_message)
|
24
|
+
end
|
25
|
+
multiplier = matched[:units] || unit
|
26
|
+
numeric = matched[:number].to_f
|
27
|
+
case multiplier
|
28
|
+
when "m","min","mins","minute","minutes"
|
29
|
+
ValidatedStruct.new(numeric * 60, nil)
|
30
|
+
when "h","hour","hours"
|
31
|
+
ValidatedStruct.new(numeric * HOURS, nil)
|
32
|
+
when "d","day","days"
|
33
|
+
ValidatedStruct.new(numeric * DAYS, nil)
|
34
|
+
when "w","week","weeks"
|
35
|
+
ValidatedStruct.new(numeric * 7 * DAYS, nil)
|
36
|
+
when "ms","msec","msecs"
|
37
|
+
ValidatedStruct.new(numeric / KILO, nil)
|
38
|
+
when "us","usec","usecs"
|
39
|
+
ValidatedStruct.new(numeric / MEGA, nil)
|
40
|
+
else
|
41
|
+
ValidatedStruct.new(numeric, nil)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end end
|
data/logstash-input-file.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-input-file'
|
4
|
-
s.version = '4.1.
|
4
|
+
s.version = '4.1.3'
|
5
5
|
s.licenses = ['Apache-2.0']
|
6
6
|
s.summary = "Streams events from files"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'spec_helper'
|
3
|
+
|
4
|
+
module FileWatch
|
5
|
+
describe ReadMode::Handlers::ReadFile do
|
6
|
+
let(:settings) do
|
7
|
+
Settings.from_options(
|
8
|
+
:sincedb_write_interval => 0,
|
9
|
+
:sincedb_path => File::NULL
|
10
|
+
)
|
11
|
+
end
|
12
|
+
let(:sdb_collection) { SincedbCollection.new(settings) }
|
13
|
+
let(:directory) { Pathname.new(FIXTURE_DIR) }
|
14
|
+
let(:pathname) { directory.join('uncompressed.log') }
|
15
|
+
let(:watched_file) { WatchedFile.new(pathname, pathname.stat, settings) }
|
16
|
+
let(:processor) { ReadMode::Processor.new(settings).add_watch(watch) }
|
17
|
+
let(:file) { DummyFileReader.new(settings.file_chunk_size, 2) }
|
18
|
+
|
19
|
+
context "simulate reading a 64KB file with a default chunk size of 32KB and a zero sincedb write interval" do
|
20
|
+
let(:watch) { double("watch", :quit? => false) }
|
21
|
+
it "calls 'sincedb_write' exactly 2 times" do
|
22
|
+
allow(FileOpener).to receive(:open).with(watched_file.path).and_return(file)
|
23
|
+
expect(sdb_collection).to receive(:sincedb_write).exactly(2).times
|
24
|
+
watched_file.activate
|
25
|
+
processor.initialize_handlers(sdb_collection, TestObserver.new)
|
26
|
+
processor.read_file(watched_file)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context "simulate reading a 64KB file with a default chunk size of 32KB and a zero sincedb write interval" do
|
31
|
+
let(:watch) { double("watch", :quit? => true) }
|
32
|
+
it "calls 'sincedb_write' exactly 0 times as shutdown is in progress" do
|
33
|
+
expect(sdb_collection).to receive(:sincedb_write).exactly(0).times
|
34
|
+
watched_file.activate
|
35
|
+
processor.initialize_handlers(sdb_collection, TestObserver.new)
|
36
|
+
processor.read_file(watched_file)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -28,6 +28,32 @@ require 'filewatch/bootstrap'
|
|
28
28
|
|
29
29
|
module FileWatch
|
30
30
|
|
31
|
+
class DummyFileReader
|
32
|
+
def initialize(read_size, iterations)
|
33
|
+
@read_size = read_size
|
34
|
+
@iterations = iterations
|
35
|
+
@closed = false
|
36
|
+
@accumulated = 0
|
37
|
+
end
|
38
|
+
def file_seek(*)
|
39
|
+
end
|
40
|
+
def close()
|
41
|
+
@closed = true
|
42
|
+
end
|
43
|
+
def closed?
|
44
|
+
@closed
|
45
|
+
end
|
46
|
+
def sysread(amount)
|
47
|
+
@accumulated += amount
|
48
|
+
if @accumulated > @read_size * @iterations
|
49
|
+
raise EOFError.new
|
50
|
+
end
|
51
|
+
string = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcde\n"
|
52
|
+
multiplier = amount / string.length
|
53
|
+
string * multiplier
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
31
57
|
FIXTURE_DIR = File.join('spec', 'fixtures')
|
32
58
|
|
33
59
|
def self.make_file_older(path, seconds)
|
@@ -76,7 +76,7 @@ module FileWatch
|
|
76
76
|
|
77
77
|
context "when close_older is set" do
|
78
78
|
let(:wait_before_quit) { 0.8 }
|
79
|
-
let(:opts) { super.merge(:close_older => 0.
|
79
|
+
let(:opts) { super.merge(:close_older => 0.15, :max_active => 1, :stat_interval => 0.1) }
|
80
80
|
it "opens both files" do
|
81
81
|
actions.activate
|
82
82
|
tailing.watch_this(watch_dir)
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "helpers/spec_helper"
|
4
|
+
require "logstash/inputs/friendly_durations"
|
5
|
+
|
6
|
+
describe "FriendlyDurations module function call" do
|
7
|
+
context "unacceptable strings" do
|
8
|
+
it "gives an error message for 'foobar'" do
|
9
|
+
result = LogStash::Inputs::FriendlyDurations.call("foobar","sec")
|
10
|
+
expect(result.error_message).to start_with("Value 'foobar' is not a valid duration string e.g. 200 usec")
|
11
|
+
end
|
12
|
+
it "gives an error message for '5 5 days'" do
|
13
|
+
result = LogStash::Inputs::FriendlyDurations.call("5 5 days","sec")
|
14
|
+
expect(result.error_message).to start_with("Value '5 5 days' is not a valid duration string e.g. 200 usec")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context "when a unit is not specified, a unit override will affect the result" do
|
19
|
+
it "coerces 14 to 1209600.0s as days" do
|
20
|
+
result = LogStash::Inputs::FriendlyDurations.call(14,"d")
|
21
|
+
expect(result.error_message).to eq(nil)
|
22
|
+
expect(result.value).to eq(1209600.0)
|
23
|
+
end
|
24
|
+
it "coerces '30' to 1800.0s as minutes" do
|
25
|
+
result = LogStash::Inputs::FriendlyDurations.call("30","minutes")
|
26
|
+
expect(result.to_a).to eq([true, 1800.0])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context "acceptable strings" do
|
31
|
+
[
|
32
|
+
["10", 10.0],
|
33
|
+
["10.5 s", 10.5],
|
34
|
+
["10.75 secs", 10.75],
|
35
|
+
["11 second", 11.0],
|
36
|
+
["10 seconds", 10.0],
|
37
|
+
["500 ms", 0.5],
|
38
|
+
["750.9 msec", 0.7509],
|
39
|
+
["750.9 msecs", 0.7509],
|
40
|
+
["750.9 us", 0.0007509],
|
41
|
+
["750.9 usec", 0.0007509],
|
42
|
+
["750.9 usecs", 0.0007509],
|
43
|
+
["1.5m", 90.0],
|
44
|
+
["2.5 m", 150.0],
|
45
|
+
["1.25 min", 75.0],
|
46
|
+
["1 minute", 60.0],
|
47
|
+
["2.5 minutes", 150.0],
|
48
|
+
["2h", 7200.0],
|
49
|
+
["2 h", 7200.0],
|
50
|
+
["1 hour", 3600.0],
|
51
|
+
["1hour", 3600.0],
|
52
|
+
["3 hours", 10800.0],
|
53
|
+
["0.5d", 43200.0],
|
54
|
+
["1day", 86400.0],
|
55
|
+
["1 day", 86400.0],
|
56
|
+
["2days", 172800.0],
|
57
|
+
["14 days", 1209600.0],
|
58
|
+
["1w", 604800.0],
|
59
|
+
["1 w", 604800.0],
|
60
|
+
["1 week", 604800.0],
|
61
|
+
["2weeks", 1209600.0],
|
62
|
+
["2 weeks", 1209600.0],
|
63
|
+
["1.5 weeks", 907200.0],
|
64
|
+
].each do |input, coerced|
|
65
|
+
it "coerces #{input.inspect.rjust(16)} to #{coerced.inspect}" do
|
66
|
+
result = LogStash::Inputs::FriendlyDurations.call(input,"sec")
|
67
|
+
expect(result.to_a).to eq([true, coerced])
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.1.
|
4
|
+
version: 4.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -204,9 +204,11 @@ files:
|
|
204
204
|
- lib/logstash/inputs/file.rb
|
205
205
|
- lib/logstash/inputs/file/patch.rb
|
206
206
|
- lib/logstash/inputs/file_listener.rb
|
207
|
+
- lib/logstash/inputs/friendly_durations.rb
|
207
208
|
- lib/logstash/inputs/log_completed_file_handler.rb
|
208
209
|
- logstash-input-file.gemspec
|
209
210
|
- spec/filewatch/buftok_spec.rb
|
211
|
+
- spec/filewatch/read_mode_handlers_read_file_spec.rb
|
210
212
|
- spec/filewatch/reading_spec.rb
|
211
213
|
- spec/filewatch/sincedb_record_serializer_spec.rb
|
212
214
|
- spec/filewatch/spec_helper.rb
|
@@ -222,6 +224,7 @@ files:
|
|
222
224
|
- spec/helpers/spec_helper.rb
|
223
225
|
- spec/inputs/file_read_spec.rb
|
224
226
|
- spec/inputs/file_tail_spec.rb
|
227
|
+
- spec/inputs/friendly_durations_spec.rb
|
225
228
|
homepage: http://www.elastic.co/guide/en/logstash/current/index.html
|
226
229
|
licenses:
|
227
230
|
- Apache-2.0
|
@@ -250,6 +253,7 @@ specification_version: 4
|
|
250
253
|
summary: Streams events from files
|
251
254
|
test_files:
|
252
255
|
- spec/filewatch/buftok_spec.rb
|
256
|
+
- spec/filewatch/read_mode_handlers_read_file_spec.rb
|
253
257
|
- spec/filewatch/reading_spec.rb
|
254
258
|
- spec/filewatch/sincedb_record_serializer_spec.rb
|
255
259
|
- spec/filewatch/spec_helper.rb
|
@@ -265,3 +269,4 @@ test_files:
|
|
265
269
|
- spec/helpers/spec_helper.rb
|
266
270
|
- spec/inputs/file_read_spec.rb
|
267
271
|
- spec/inputs/file_tail_spec.rb
|
272
|
+
- spec/inputs/friendly_durations_spec.rb
|