logstash-input-file 4.1.2 → 4.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/docs/index.asciidoc +68 -20
- data/lib/filewatch/observing_read.rb +0 -1
- data/lib/filewatch/read_mode/handlers/base.rb +6 -1
- data/lib/filewatch/read_mode/handlers/read_file.rb +2 -3
- data/lib/filewatch/read_mode/handlers/read_zip_file.rb +7 -6
- data/lib/filewatch/read_mode/processor.rb +4 -2
- data/lib/filewatch/sincedb_collection.rb +22 -10
- data/lib/filewatch/watch.rb +1 -0
- data/lib/jars/filewatch-1.0.0.jar +0 -0
- data/lib/logstash/inputs/file.rb +21 -8
- data/lib/logstash/inputs/friendly_durations.rb +45 -0
- data/logstash-input-file.gemspec +1 -1
- data/spec/filewatch/read_mode_handlers_read_file_spec.rb +40 -0
- data/spec/filewatch/spec_helper.rb +26 -0
- data/spec/filewatch/tailing_spec.rb +1 -1
- data/spec/inputs/friendly_durations_spec.rb +71 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44f11a07375e6cf964220179c9cea4259a9bb6829dbbdda85e05fef737a9e214
|
4
|
+
data.tar.gz: 9005ada3317a3d947bce138f9bc014647f79ad871db7cfd373642f51f94ca8bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 772fedf54c74b08d660f2a6aca1dd43f9ab04cb9290ef67d409e26419931bcf0fa26de027c075d561f35f40d55ac8b43c376c7b7c88f1f7982f1eb243361c7a3
|
7
|
+
data.tar.gz: dc5dc4aa91b870368967f92fc4129faa4e2b470e0bd942dda965711fab90def3658e9efd8f9158f3f98193a5aedcf23322bf65c7f5a28a4c32b84d6aac370118
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 4.1.3
|
2
|
+
- Fixed `read` mode of regular files sincedb write is requested in each read loop
|
3
|
+
iteration rather than waiting for the end-of-file to be reached. Note: for gz files,
|
4
|
+
the sincedb entry can only be updated at the end of the file as it is not possible
|
5
|
+
to seek into a compressed file and begin reading from that position.
|
6
|
+
[#196](https://github.com/logstash-plugins/logstash-input-file/pull/196)
|
7
|
+
- Added support for String Durations in some settings e.g. `stat_interval => "750 ms"`
|
8
|
+
[#194](https://github.com/logstash-plugins/logstash-input-file/pull/194)
|
9
|
+
|
1
10
|
## 4.1.2
|
2
11
|
- Fix `require winhelper` error in WINDOWS.
|
3
12
|
[Issue #184](https://github.com/logstash-plugins/logstash-input-file/issues/184)
|
data/docs/index.asciidoc
CHANGED
@@ -146,10 +146,15 @@ will not get picked up.
|
|
146
146
|
|
147
147
|
This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
|
148
148
|
|
149
|
+
[NOTE]
|
150
|
+
Duration settings can be specified in text form e.g. "250 ms", this string will be converted into
|
151
|
+
decimal seconds. There are quite a few supported natural and abbreviated durations,
|
152
|
+
see <<string_duration,string duration>> for the details.
|
153
|
+
|
149
154
|
[cols="<,<,<",options="header",]
|
150
155
|
|=======================================================================
|
151
156
|
|Setting |Input type|Required
|
152
|
-
| <<plugins-{type}s-{plugin}-close_older>> |<<number,number>>|No
|
157
|
+
| <<plugins-{type}s-{plugin}-close_older>> |<<number,number>> or <<string_duration,string duration>>|No
|
153
158
|
| <<plugins-{type}s-{plugin}-delimiter>> |<<string,string>>|No
|
154
159
|
| <<plugins-{type}s-{plugin}-discover_interval>> |<<number,number>>|No
|
155
160
|
| <<plugins-{type}s-{plugin}-exclude>> |<<array,array>>|No
|
@@ -159,15 +164,15 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
159
164
|
| <<plugins-{type}s-{plugin}-file_completed_log_path>> |<<string,string>>|No
|
160
165
|
| <<plugins-{type}s-{plugin}-file_sort_by>> |<<string,string>>, one of `["last_modified", "path"]`|No
|
161
166
|
| <<plugins-{type}s-{plugin}-file_sort_direction>> |<<string,string>>, one of `["asc", "desc"]`|No
|
162
|
-
| <<plugins-{type}s-{plugin}-ignore_older>> |<<number,number>>|No
|
167
|
+
| <<plugins-{type}s-{plugin}-ignore_older>> |<<number,number>> or <<string_duration,string duration>>|No
|
163
168
|
| <<plugins-{type}s-{plugin}-max_open_files>> |<<number,number>>|No
|
164
169
|
| <<plugins-{type}s-{plugin}-mode>> |<<string,string>>, one of `["tail", "read"]`|No
|
165
170
|
| <<plugins-{type}s-{plugin}-path>> |<<array,array>>|Yes
|
166
|
-
| <<plugins-{type}s-{plugin}-sincedb_clean_after>> |<<number,number>>|No
|
171
|
+
| <<plugins-{type}s-{plugin}-sincedb_clean_after>> |<<number,number>> or <<string_duration,string duration>>|No
|
167
172
|
| <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
|
168
|
-
| <<plugins-{type}s-{plugin}-sincedb_write_interval>> |<<number,number>>|No
|
173
|
+
| <<plugins-{type}s-{plugin}-sincedb_write_interval>> |<<number,number>> or <<string_duration,string duration>>|No
|
169
174
|
| <<plugins-{type}s-{plugin}-start_position>> |<<string,string>>, one of `["beginning", "end"]`|No
|
170
|
-
| <<plugins-{type}s-{plugin}-stat_interval>> |<<number,number>>|No
|
175
|
+
| <<plugins-{type}s-{plugin}-stat_interval>> |<<number,number>> or <<string_duration,string duration>>|No
|
171
176
|
|=======================================================================
|
172
177
|
|
173
178
|
Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
|
@@ -178,18 +183,18 @@ input plugins.
|
|
178
183
|
[id="plugins-{type}s-{plugin}-close_older"]
|
179
184
|
===== `close_older`
|
180
185
|
|
181
|
-
* Value type is <<number,number>>
|
182
|
-
* Default value is `
|
186
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
187
|
+
* Default value is `"1 hour"`
|
183
188
|
|
184
189
|
The file input closes any files that were last read the specified
|
185
|
-
|
190
|
+
duration (seconds if a number is specified) ago.
|
186
191
|
This has different implications depending on if a file is being tailed or
|
187
192
|
read. If tailing, and there is a large time gap in incoming data the file
|
188
193
|
can be closed (allowing other files to be opened) but will be queued for
|
189
194
|
reopening when new data is detected. If reading, the file will be closed
|
190
195
|
after closed_older seconds from when the last bytes were read.
|
191
196
|
This setting is retained for backward compatibility if you upgrade the
|
192
|
-
plugin to
|
197
|
+
plugin to 4.1.0+, are reading not tailing and do not switch to using Read mode.
|
193
198
|
|
194
199
|
[id="plugins-{type}s-{plugin}-delimiter"]
|
195
200
|
===== `delimiter`
|
@@ -206,8 +211,10 @@ this setting is not used, instead the standard Windows or Unix line endings are
|
|
206
211
|
* Value type is <<number,number>>
|
207
212
|
* Default value is `15`
|
208
213
|
|
209
|
-
How often
|
210
|
-
`
|
214
|
+
How often we expand the filename patterns in the `path` option to discover new files to watch.
|
215
|
+
This value is a multiple to `stat_interval`, e.g. if `stat_interval` is "500 ms" then new files
|
216
|
+
files could be discovered every 15 X 500 milliseconds - 7.5 seconds.
|
217
|
+
In practice, this will be the best case because the time taken to read new content needs to be factored in.
|
211
218
|
|
212
219
|
[id="plugins-{type}s-{plugin}-exclude"]
|
213
220
|
===== `exclude`
|
@@ -294,11 +301,11 @@ If you use special naming conventions for the file full paths then perhaps
|
|
294
301
|
[id="plugins-{type}s-{plugin}-ignore_older"]
|
295
302
|
===== `ignore_older`
|
296
303
|
|
297
|
-
* Value type is <<number,number>>
|
304
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
298
305
|
* There is no default value for this setting.
|
299
306
|
|
300
307
|
When the file input discovers a file that was last modified
|
301
|
-
before the specified
|
308
|
+
before the specified duration (seconds if a number is specified), the file is ignored.
|
302
309
|
After it's discovery, if an ignored file is modified it is no
|
303
310
|
longer ignored and any new data is read. By default, this option is
|
304
311
|
disabled. Note this unit is in seconds.
|
@@ -354,9 +361,9 @@ on the {logstash-ref}/configuration-file-structure.html#array[Logstash configura
|
|
354
361
|
[id="plugins-{type}s-{plugin}-sincedb_clean_after"]
|
355
362
|
===== `sincedb_clean_after`
|
356
363
|
|
357
|
-
* Value type is <<number,number>>
|
358
|
-
* The default value for this setting is
|
359
|
-
*
|
364
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
365
|
+
* The default value for this setting is "2 weeks".
|
366
|
+
* If a number is specified then it is interpreted as *days* and can be decimal e.g. 0.5 is 12 hours.
|
360
367
|
|
361
368
|
The sincedb record now has a last active timestamp associated with it.
|
362
369
|
If no changes are detected in a tracked file in the last N days its sincedb
|
@@ -378,8 +385,8 @@ NOTE: it must be a file path and not a directory path
|
|
378
385
|
[id="plugins-{type}s-{plugin}-sincedb_write_interval"]
|
379
386
|
===== `sincedb_write_interval`
|
380
387
|
|
381
|
-
* Value type is <<number,number>>
|
382
|
-
* Default value is `15`
|
388
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
389
|
+
* Default value is `"15 seconds"`
|
383
390
|
|
384
391
|
How often (in seconds) to write a since database with the current position of
|
385
392
|
monitored log files.
|
@@ -404,15 +411,56 @@ position recorded in the sincedb file will be used.
|
|
404
411
|
[id="plugins-{type}s-{plugin}-stat_interval"]
|
405
412
|
===== `stat_interval`
|
406
413
|
|
407
|
-
* Value type is <<number,number>>
|
408
|
-
* Default value is `1`
|
414
|
+
* Value type is <<number,number>> or <<string_duration,string duration>>
|
415
|
+
* Default value is `"1 second"`
|
409
416
|
|
410
417
|
How often (in seconds) we stat files to see if they have been modified.
|
411
418
|
Increasing this interval will decrease the number of system calls we make,
|
412
419
|
but increase the time to detect new log lines.
|
420
|
+
[NOTE]
|
421
|
+
Discovering new files and checking whether they have grown/or shrunk occurs in a loop.
|
422
|
+
This loop will sleep for `stat_interval` seconds before looping again. However, if files
|
423
|
+
have grown, the new content is read and lines are enqueued.
|
424
|
+
Reading and enqueuing across all grown files can take time, especially if
|
425
|
+
the pipeline is congested. So the overall loop time is a combination of the
|
426
|
+
`stat_interval` and the file read time.
|
413
427
|
|
414
428
|
[id="plugins-{type}s-{plugin}-common-options"]
|
415
429
|
include::{include_path}/{type}.asciidoc[]
|
416
430
|
|
417
431
|
:default_codec!:
|
418
432
|
|
433
|
+
[id="string_duration"]
|
434
|
+
// Move this to the includes when we make string durations available generally.
|
435
|
+
==== String Durations
|
436
|
+
|
437
|
+
Format is `number` `string` and the space between these is optional.
|
438
|
+
So "45s" and "45 s" are both valid.
|
439
|
+
[TIP]
|
440
|
+
Use the most suitable duration, for example, "3 days" rather than "72 hours".
|
441
|
+
|
442
|
+
===== Weeks
|
443
|
+
Supported values: `w` `week` `weeks`, e.g. "2 w", "1 week", "4 weeks".
|
444
|
+
|
445
|
+
===== Days
|
446
|
+
Supported values: `d` `day` `days`, e.g. "2 d", "1 day", "2.5 days".
|
447
|
+
|
448
|
+
===== Hours
|
449
|
+
Supported values: `h` `hour` `hours`, e.g. "4 h", "1 hour", "0.5 hours".
|
450
|
+
|
451
|
+
===== Minutes
|
452
|
+
Supported values: `m` `min` `minute` `minutes`, e.g. "45 m", "35 min", "1 minute", "6 minutes".
|
453
|
+
|
454
|
+
===== Seconds
|
455
|
+
Supported values: `s` `sec` `second` `seconds`, e.g. "45 s", "15 sec", "1 second", "2.5 seconds".
|
456
|
+
|
457
|
+
===== Milliseconds
|
458
|
+
Supported values: `ms` `msec` `msecs`, e.g. "500 ms", "750 msec", "50 msecs
|
459
|
+
[NOTE]
|
460
|
+
`milli` `millis` and `milliseconds` are not supported
|
461
|
+
|
462
|
+
===== Microseconds
|
463
|
+
Supported values: `us` `usec` `usecs`, e.g. "600 us", "800 usec", "900 usecs"
|
464
|
+
[NOTE]
|
465
|
+
`micro` `micros` and `microseconds` are not supported
|
466
|
+
|
@@ -7,12 +7,17 @@ module FileWatch module ReadMode module Handlers
|
|
7
7
|
|
8
8
|
attr_reader :sincedb_collection
|
9
9
|
|
10
|
-
def initialize(sincedb_collection, observer, settings)
|
10
|
+
def initialize(processor, sincedb_collection, observer, settings)
|
11
11
|
@settings = settings
|
12
|
+
@processor = processor
|
12
13
|
@sincedb_collection = sincedb_collection
|
13
14
|
@observer = observer
|
14
15
|
end
|
15
16
|
|
17
|
+
def quit?
|
18
|
+
@processor.watch.quit?
|
19
|
+
end
|
20
|
+
|
16
21
|
def handle(watched_file)
|
17
22
|
logger.debug("handling: #{watched_file.path}")
|
18
23
|
unless watched_file.has_listener?
|
@@ -5,13 +5,12 @@ module FileWatch module ReadMode module Handlers
|
|
5
5
|
def handle_specifically(watched_file)
|
6
6
|
if open_file(watched_file)
|
7
7
|
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
8
|
-
changed = false
|
9
8
|
@settings.file_chunk_count.times do
|
9
|
+
break if quit?
|
10
10
|
begin
|
11
11
|
data = watched_file.file_read(@settings.file_chunk_size)
|
12
12
|
result = watched_file.buffer_extract(data) # expect BufferExtractResult
|
13
13
|
logger.info(result.warning, result.additional) unless result.warning.empty?
|
14
|
-
changed = true
|
15
14
|
result.lines.each do |line|
|
16
15
|
watched_file.listener.accept(line)
|
17
16
|
# sincedb position is independent from the watched_file bytes_read
|
@@ -20,6 +19,7 @@ module FileWatch module ReadMode module Handlers
|
|
20
19
|
# instead of tracking the bytes_read line by line we need to track by the data read size.
|
21
20
|
# because we initially seek to the bytes_read not the sincedb position
|
22
21
|
watched_file.increment_bytes_read(data.bytesize)
|
22
|
+
sincedb_collection.request_disk_flush
|
23
23
|
rescue EOFError
|
24
24
|
# flush the buffer now in case there is no final delimiter
|
25
25
|
line = watched_file.buffer.flush
|
@@ -40,7 +40,6 @@ module FileWatch module ReadMode module Handlers
|
|
40
40
|
break
|
41
41
|
end
|
42
42
|
end
|
43
|
-
sincedb_collection.request_disk_flush if changed
|
44
43
|
end
|
45
44
|
end
|
46
45
|
end
|
@@ -13,10 +13,6 @@ module FileWatch module ReadMode module Handlers
|
|
13
13
|
add_or_update_sincedb_collection(watched_file) unless sincedb_collection.member?(watched_file.sincedb_key)
|
14
14
|
# can't really stripe read a zip file, its all or nothing.
|
15
15
|
watched_file.listener.opened
|
16
|
-
# what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading)
|
17
|
-
# should we track lines read in the sincedb and
|
18
|
-
# fast forward through the lines until we reach unseen content?
|
19
|
-
# meaning that we can quit in the middle of a zip file
|
20
16
|
begin
|
21
17
|
file_stream = FileInputStream.new(watched_file.path)
|
22
18
|
gzip_stream = GZIPInputStream.new(file_stream)
|
@@ -24,14 +20,19 @@ module FileWatch module ReadMode module Handlers
|
|
24
20
|
buffered = BufferedReader.new(decoder)
|
25
21
|
while (line = buffered.readLine(false))
|
26
22
|
watched_file.listener.accept(line)
|
23
|
+
# can't quit, if we did then we would incorrectly write a 'completed' sincedb entry
|
24
|
+
# what do we do about quit when we have just begun reading the zipped file (e.g. pipeline reloading)
|
25
|
+
# should we track lines read in the sincedb and
|
26
|
+
# fast forward through the lines until we reach unseen content?
|
27
|
+
# meaning that we can quit in the middle of a zip file
|
27
28
|
end
|
28
29
|
watched_file.listener.eof
|
29
30
|
rescue ZipException => e
|
30
31
|
logger.error("Cannot decompress the gzip file at path: #{watched_file.path}")
|
31
32
|
watched_file.listener.error
|
32
33
|
else
|
33
|
-
|
34
|
-
sincedb_collection.
|
34
|
+
watched_file.update_bytes_read(watched_file.last_stat_size)
|
35
|
+
sincedb_collection.unset_watched_file(watched_file)
|
35
36
|
watched_file.listener.deleted
|
36
37
|
watched_file.unwatch
|
37
38
|
ensure
|
@@ -25,8 +25,10 @@ module FileWatch module ReadMode
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def initialize_handlers(sincedb_collection, observer)
|
28
|
-
|
29
|
-
|
28
|
+
# we deviate from the tail mode handler initialization here
|
29
|
+
# by adding a reference to self so we can read the quit flag during a (depth first) read loop
|
30
|
+
@read_file = Handlers::ReadFile.new(self, sincedb_collection, observer, @settings)
|
31
|
+
@read_zip_file = Handlers::ReadZipFile.new(self, sincedb_collection, observer, @settings)
|
30
32
|
end
|
31
33
|
|
32
34
|
def read_file(watched_file)
|
@@ -20,14 +20,21 @@ module FileWatch
|
|
20
20
|
@write_method = LogStash::Environment.windows? || @path.chardev? || @path.blockdev? ? method(:non_atomic_write) : method(:atomic_write)
|
21
21
|
@full_path = @path.to_path
|
22
22
|
FileUtils.touch(@full_path)
|
23
|
+
@write_requested = false
|
24
|
+
end
|
25
|
+
|
26
|
+
def write_requested?
|
27
|
+
@write_requested
|
23
28
|
end
|
24
29
|
|
25
30
|
def request_disk_flush
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
@write_requested = true
|
32
|
+
flush_at_interval
|
33
|
+
end
|
34
|
+
|
35
|
+
def write_if_requested
|
36
|
+
if write_requested?
|
37
|
+
flush_at_interval
|
31
38
|
end
|
32
39
|
end
|
33
40
|
|
@@ -51,7 +58,6 @@ module FileWatch
|
|
51
58
|
#No existing sincedb to load
|
52
59
|
logger.debug("open: error: #{path}: #{e.inspect}")
|
53
60
|
end
|
54
|
-
|
55
61
|
end
|
56
62
|
|
57
63
|
def associate(watched_file)
|
@@ -130,10 +136,6 @@ module FileWatch
|
|
130
136
|
@sincedb[key].update_position(0)
|
131
137
|
end
|
132
138
|
|
133
|
-
def store_last_read(key, last_read)
|
134
|
-
@sincedb[key].update_position(last_read)
|
135
|
-
end
|
136
|
-
|
137
139
|
def increment(key, amount)
|
138
140
|
@sincedb[key].increment_position(amount)
|
139
141
|
end
|
@@ -167,6 +169,15 @@ module FileWatch
|
|
167
169
|
|
168
170
|
private
|
169
171
|
|
172
|
+
def flush_at_interval
|
173
|
+
now = Time.now.to_i
|
174
|
+
delta = now - @sincedb_last_write
|
175
|
+
if delta >= @settings.sincedb_write_interval
|
176
|
+
logger.debug("writing sincedb (delta since last write = #{delta})")
|
177
|
+
sincedb_write(now)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
170
181
|
def handle_association(sincedb_value, watched_file)
|
171
182
|
watched_file.update_bytes_read(sincedb_value.position)
|
172
183
|
sincedb_value.set_watched_file(watched_file)
|
@@ -193,6 +204,7 @@ module FileWatch
|
|
193
204
|
logger.debug("sincedb_write: cleaned", "key" => "'#{key}'")
|
194
205
|
end
|
195
206
|
@sincedb_last_write = time
|
207
|
+
@write_requested = false
|
196
208
|
rescue Errno::EACCES
|
197
209
|
# no file handles free perhaps
|
198
210
|
# maybe it will work next time
|
data/lib/filewatch/watch.rb
CHANGED
Binary file
|
data/lib/logstash/inputs/file.rb
CHANGED
@@ -11,6 +11,7 @@ require_relative "file/patch"
|
|
11
11
|
require_relative "file_listener"
|
12
12
|
require_relative "delete_completed_file_handler"
|
13
13
|
require_relative "log_completed_file_handler"
|
14
|
+
require_relative "friendly_durations"
|
14
15
|
require "filewatch/bootstrap"
|
15
16
|
|
16
17
|
# Stream events from files, normally by tailing them in a manner
|
@@ -109,7 +110,7 @@ class File < LogStash::Inputs::Base
|
|
109
110
|
# How often (in seconds) we stat files to see if they have been modified.
|
110
111
|
# Increasing this interval will decrease the number of system calls we make,
|
111
112
|
# but increase the time to detect new log lines.
|
112
|
-
config :stat_interval, :validate =>
|
113
|
+
config :stat_interval, :validate => [FriendlyDurations, "seconds"], :default => 1
|
113
114
|
|
114
115
|
# How often (in seconds) we expand the filename patterns in the
|
115
116
|
# `path` option to discover new files to watch.
|
@@ -123,7 +124,7 @@ class File < LogStash::Inputs::Base
|
|
123
124
|
|
124
125
|
# How often (in seconds) to write a since database with the current position of
|
125
126
|
# monitored log files.
|
126
|
-
config :sincedb_write_interval, :validate =>
|
127
|
+
config :sincedb_write_interval, :validate => [FriendlyDurations, "seconds"], :default => 15
|
127
128
|
|
128
129
|
# Choose where Logstash starts initially reading files: at the beginning or
|
129
130
|
# at the end. The default behavior treats files like live streams and thus
|
@@ -145,7 +146,7 @@ class File < LogStash::Inputs::Base
|
|
145
146
|
# After its discovery, if an ignored file is modified it is no
|
146
147
|
# longer ignored and any new data is read. By default, this option is
|
147
148
|
# disabled. Note this unit is in seconds.
|
148
|
-
config :ignore_older, :validate =>
|
149
|
+
config :ignore_older, :validate => [FriendlyDurations, "seconds"]
|
149
150
|
|
150
151
|
# The file input closes any files that were last read the specified
|
151
152
|
# timespan in seconds ago.
|
@@ -154,7 +155,7 @@ class File < LogStash::Inputs::Base
|
|
154
155
|
# reopening when new data is detected. If reading, the file will be closed
|
155
156
|
# after closed_older seconds from when the last bytes were read.
|
156
157
|
# The default is 1 hour
|
157
|
-
config :close_older, :validate =>
|
158
|
+
config :close_older, :validate => [FriendlyDurations, "seconds"], :default => "1 hour"
|
158
159
|
|
159
160
|
# What is the maximum number of file_handles that this input consumes
|
160
161
|
# at any one time. Use close_older to close some files if you need to
|
@@ -191,7 +192,7 @@ class File < LogStash::Inputs::Base
|
|
191
192
|
# If no changes are detected in tracked files in the last N days their sincedb
|
192
193
|
# tracking record will expire and not be persisted.
|
193
194
|
# This option protects against the well known inode recycling problem. (add reference)
|
194
|
-
config :sincedb_clean_after, :validate =>
|
195
|
+
config :sincedb_clean_after, :validate => [FriendlyDurations, "days"], :default => "14 days" # days
|
195
196
|
|
196
197
|
# File content is read off disk in blocks or chunks, then using whatever the set delimiter
|
197
198
|
# is, lines are extracted from the chunk. Specify the size in bytes of each chunk.
|
@@ -222,6 +223,20 @@ class File < LogStash::Inputs::Base
|
|
222
223
|
config :file_sort_direction, :validate => ["asc", "desc"], :default => "asc"
|
223
224
|
|
224
225
|
public
|
226
|
+
|
227
|
+
class << self
|
228
|
+
alias_method :old_validate_value, :validate_value
|
229
|
+
|
230
|
+
def validate_value(value, validator)
|
231
|
+
if validator.is_a?(Array) && validator.size == 2 && validator.first.respond_to?(:call)
|
232
|
+
callable, units = *validator
|
233
|
+
# returns a ValidatedStruct having a `to_a` method suitable to return to the config mixin caller
|
234
|
+
return callable.call(value, units).to_a
|
235
|
+
end
|
236
|
+
old_validate_value(value, validator)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
225
240
|
def register
|
226
241
|
require "addressable/uri"
|
227
242
|
require "digest/md5"
|
@@ -316,6 +331,7 @@ class File < LogStash::Inputs::Base
|
|
316
331
|
start_processing
|
317
332
|
@queue = queue
|
318
333
|
@watcher.subscribe(self) # halts here until quit is called
|
334
|
+
# last action of the subscribe call is to write the sincedb
|
319
335
|
exit_flush
|
320
336
|
end # def run
|
321
337
|
|
@@ -338,9 +354,6 @@ class File < LogStash::Inputs::Base
|
|
338
354
|
end
|
339
355
|
|
340
356
|
def stop
|
341
|
-
# in filewatch >= 0.6.7, quit will closes and forget all files
|
342
|
-
# but it will write their last read positions to since_db
|
343
|
-
# beforehand
|
344
357
|
if @watcher
|
345
358
|
@codec.close
|
346
359
|
@watcher.quit
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module Inputs
|
4
|
+
module FriendlyDurations
|
5
|
+
NUMBERS_RE = /^(?<number>\d+(\.\d+)?)\s?(?<units>s((ec)?(ond)?)(s)?|m((in)?(ute)?)(s)?|h(our)?(s)?|d(ay)?(s)?|w(eek)?(s)?|us(ec)?(s)?|ms(ec)?(s)?)?$/
|
6
|
+
HOURS = 3600
|
7
|
+
DAYS = 24 * HOURS
|
8
|
+
MEGA = 10**6
|
9
|
+
KILO = 10**3
|
10
|
+
|
11
|
+
ValidatedStruct = Struct.new(:value, :error_message) do
|
12
|
+
def to_a
|
13
|
+
error_message.nil? ? [true, value] : [false, error_message]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.call(value, unit = "sec")
|
18
|
+
# coerce into seconds
|
19
|
+
val_string = value.to_s.strip
|
20
|
+
matched = NUMBERS_RE.match(val_string)
|
21
|
+
if matched.nil?
|
22
|
+
failed_message = "Value '#{val_string}' is not a valid duration string e.g. 200 usec, 250ms, 60 sec, 18h, 21.5d, 1 day, 2w, 6 weeks"
|
23
|
+
return ValidatedStruct.new(nil, failed_message)
|
24
|
+
end
|
25
|
+
multiplier = matched[:units] || unit
|
26
|
+
numeric = matched[:number].to_f
|
27
|
+
case multiplier
|
28
|
+
when "m","min","mins","minute","minutes"
|
29
|
+
ValidatedStruct.new(numeric * 60, nil)
|
30
|
+
when "h","hour","hours"
|
31
|
+
ValidatedStruct.new(numeric * HOURS, nil)
|
32
|
+
when "d","day","days"
|
33
|
+
ValidatedStruct.new(numeric * DAYS, nil)
|
34
|
+
when "w","week","weeks"
|
35
|
+
ValidatedStruct.new(numeric * 7 * DAYS, nil)
|
36
|
+
when "ms","msec","msecs"
|
37
|
+
ValidatedStruct.new(numeric / KILO, nil)
|
38
|
+
when "us","usec","usecs"
|
39
|
+
ValidatedStruct.new(numeric / MEGA, nil)
|
40
|
+
else
|
41
|
+
ValidatedStruct.new(numeric, nil)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end end
|
data/logstash-input-file.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-input-file'
|
4
|
-
s.version = '4.1.
|
4
|
+
s.version = '4.1.3'
|
5
5
|
s.licenses = ['Apache-2.0']
|
6
6
|
s.summary = "Streams events from files"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'spec_helper'
|
3
|
+
|
4
|
+
module FileWatch
|
5
|
+
describe ReadMode::Handlers::ReadFile do
|
6
|
+
let(:settings) do
|
7
|
+
Settings.from_options(
|
8
|
+
:sincedb_write_interval => 0,
|
9
|
+
:sincedb_path => File::NULL
|
10
|
+
)
|
11
|
+
end
|
12
|
+
let(:sdb_collection) { SincedbCollection.new(settings) }
|
13
|
+
let(:directory) { Pathname.new(FIXTURE_DIR) }
|
14
|
+
let(:pathname) { directory.join('uncompressed.log') }
|
15
|
+
let(:watched_file) { WatchedFile.new(pathname, pathname.stat, settings) }
|
16
|
+
let(:processor) { ReadMode::Processor.new(settings).add_watch(watch) }
|
17
|
+
let(:file) { DummyFileReader.new(settings.file_chunk_size, 2) }
|
18
|
+
|
19
|
+
context "simulate reading a 64KB file with a default chunk size of 32KB and a zero sincedb write interval" do
|
20
|
+
let(:watch) { double("watch", :quit? => false) }
|
21
|
+
it "calls 'sincedb_write' exactly 2 times" do
|
22
|
+
allow(FileOpener).to receive(:open).with(watched_file.path).and_return(file)
|
23
|
+
expect(sdb_collection).to receive(:sincedb_write).exactly(2).times
|
24
|
+
watched_file.activate
|
25
|
+
processor.initialize_handlers(sdb_collection, TestObserver.new)
|
26
|
+
processor.read_file(watched_file)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context "simulate reading a 64KB file with a default chunk size of 32KB and a zero sincedb write interval" do
|
31
|
+
let(:watch) { double("watch", :quit? => true) }
|
32
|
+
it "calls 'sincedb_write' exactly 0 times as shutdown is in progress" do
|
33
|
+
expect(sdb_collection).to receive(:sincedb_write).exactly(0).times
|
34
|
+
watched_file.activate
|
35
|
+
processor.initialize_handlers(sdb_collection, TestObserver.new)
|
36
|
+
processor.read_file(watched_file)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -28,6 +28,32 @@ require 'filewatch/bootstrap'
|
|
28
28
|
|
29
29
|
module FileWatch
|
30
30
|
|
31
|
+
class DummyFileReader
|
32
|
+
def initialize(read_size, iterations)
|
33
|
+
@read_size = read_size
|
34
|
+
@iterations = iterations
|
35
|
+
@closed = false
|
36
|
+
@accumulated = 0
|
37
|
+
end
|
38
|
+
def file_seek(*)
|
39
|
+
end
|
40
|
+
def close()
|
41
|
+
@closed = true
|
42
|
+
end
|
43
|
+
def closed?
|
44
|
+
@closed
|
45
|
+
end
|
46
|
+
def sysread(amount)
|
47
|
+
@accumulated += amount
|
48
|
+
if @accumulated > @read_size * @iterations
|
49
|
+
raise EOFError.new
|
50
|
+
end
|
51
|
+
string = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcde\n"
|
52
|
+
multiplier = amount / string.length
|
53
|
+
string * multiplier
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
31
57
|
FIXTURE_DIR = File.join('spec', 'fixtures')
|
32
58
|
|
33
59
|
def self.make_file_older(path, seconds)
|
@@ -76,7 +76,7 @@ module FileWatch
|
|
76
76
|
|
77
77
|
context "when close_older is set" do
|
78
78
|
let(:wait_before_quit) { 0.8 }
|
79
|
-
let(:opts) { super.merge(:close_older => 0.
|
79
|
+
let(:opts) { super.merge(:close_older => 0.15, :max_active => 1, :stat_interval => 0.1) }
|
80
80
|
it "opens both files" do
|
81
81
|
actions.activate
|
82
82
|
tailing.watch_this(watch_dir)
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "helpers/spec_helper"
|
4
|
+
require "logstash/inputs/friendly_durations"
|
5
|
+
|
6
|
+
describe "FriendlyDurations module function call" do
|
7
|
+
context "unacceptable strings" do
|
8
|
+
it "gives an error message for 'foobar'" do
|
9
|
+
result = LogStash::Inputs::FriendlyDurations.call("foobar","sec")
|
10
|
+
expect(result.error_message).to start_with("Value 'foobar' is not a valid duration string e.g. 200 usec")
|
11
|
+
end
|
12
|
+
it "gives an error message for '5 5 days'" do
|
13
|
+
result = LogStash::Inputs::FriendlyDurations.call("5 5 days","sec")
|
14
|
+
expect(result.error_message).to start_with("Value '5 5 days' is not a valid duration string e.g. 200 usec")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context "when a unit is not specified, a unit override will affect the result" do
|
19
|
+
it "coerces 14 to 1209600.0s as days" do
|
20
|
+
result = LogStash::Inputs::FriendlyDurations.call(14,"d")
|
21
|
+
expect(result.error_message).to eq(nil)
|
22
|
+
expect(result.value).to eq(1209600.0)
|
23
|
+
end
|
24
|
+
it "coerces '30' to 1800.0s as minutes" do
|
25
|
+
result = LogStash::Inputs::FriendlyDurations.call("30","minutes")
|
26
|
+
expect(result.to_a).to eq([true, 1800.0])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context "acceptable strings" do
|
31
|
+
[
|
32
|
+
["10", 10.0],
|
33
|
+
["10.5 s", 10.5],
|
34
|
+
["10.75 secs", 10.75],
|
35
|
+
["11 second", 11.0],
|
36
|
+
["10 seconds", 10.0],
|
37
|
+
["500 ms", 0.5],
|
38
|
+
["750.9 msec", 0.7509],
|
39
|
+
["750.9 msecs", 0.7509],
|
40
|
+
["750.9 us", 0.0007509],
|
41
|
+
["750.9 usec", 0.0007509],
|
42
|
+
["750.9 usecs", 0.0007509],
|
43
|
+
["1.5m", 90.0],
|
44
|
+
["2.5 m", 150.0],
|
45
|
+
["1.25 min", 75.0],
|
46
|
+
["1 minute", 60.0],
|
47
|
+
["2.5 minutes", 150.0],
|
48
|
+
["2h", 7200.0],
|
49
|
+
["2 h", 7200.0],
|
50
|
+
["1 hour", 3600.0],
|
51
|
+
["1hour", 3600.0],
|
52
|
+
["3 hours", 10800.0],
|
53
|
+
["0.5d", 43200.0],
|
54
|
+
["1day", 86400.0],
|
55
|
+
["1 day", 86400.0],
|
56
|
+
["2days", 172800.0],
|
57
|
+
["14 days", 1209600.0],
|
58
|
+
["1w", 604800.0],
|
59
|
+
["1 w", 604800.0],
|
60
|
+
["1 week", 604800.0],
|
61
|
+
["2weeks", 1209600.0],
|
62
|
+
["2 weeks", 1209600.0],
|
63
|
+
["1.5 weeks", 907200.0],
|
64
|
+
].each do |input, coerced|
|
65
|
+
it "coerces #{input.inspect.rjust(16)} to #{coerced.inspect}" do
|
66
|
+
result = LogStash::Inputs::FriendlyDurations.call(input,"sec")
|
67
|
+
expect(result.to_a).to eq([true, coerced])
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-input-file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.1.
|
4
|
+
version: 4.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -204,9 +204,11 @@ files:
|
|
204
204
|
- lib/logstash/inputs/file.rb
|
205
205
|
- lib/logstash/inputs/file/patch.rb
|
206
206
|
- lib/logstash/inputs/file_listener.rb
|
207
|
+
- lib/logstash/inputs/friendly_durations.rb
|
207
208
|
- lib/logstash/inputs/log_completed_file_handler.rb
|
208
209
|
- logstash-input-file.gemspec
|
209
210
|
- spec/filewatch/buftok_spec.rb
|
211
|
+
- spec/filewatch/read_mode_handlers_read_file_spec.rb
|
210
212
|
- spec/filewatch/reading_spec.rb
|
211
213
|
- spec/filewatch/sincedb_record_serializer_spec.rb
|
212
214
|
- spec/filewatch/spec_helper.rb
|
@@ -222,6 +224,7 @@ files:
|
|
222
224
|
- spec/helpers/spec_helper.rb
|
223
225
|
- spec/inputs/file_read_spec.rb
|
224
226
|
- spec/inputs/file_tail_spec.rb
|
227
|
+
- spec/inputs/friendly_durations_spec.rb
|
225
228
|
homepage: http://www.elastic.co/guide/en/logstash/current/index.html
|
226
229
|
licenses:
|
227
230
|
- Apache-2.0
|
@@ -250,6 +253,7 @@ specification_version: 4
|
|
250
253
|
summary: Streams events from files
|
251
254
|
test_files:
|
252
255
|
- spec/filewatch/buftok_spec.rb
|
256
|
+
- spec/filewatch/read_mode_handlers_read_file_spec.rb
|
253
257
|
- spec/filewatch/reading_spec.rb
|
254
258
|
- spec/filewatch/sincedb_record_serializer_spec.rb
|
255
259
|
- spec/filewatch/spec_helper.rb
|
@@ -265,3 +269,4 @@ test_files:
|
|
265
269
|
- spec/helpers/spec_helper.rb
|
266
270
|
- spec/inputs/file_read_spec.rb
|
267
271
|
- spec/inputs/file_tail_spec.rb
|
272
|
+
- spec/inputs/friendly_durations_spec.rb
|