logstash-input-file 4.0.5 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -3
- data/JAR_VERSION +1 -0
- data/docs/index.asciidoc +195 -37
- data/lib/filewatch/bootstrap.rb +74 -0
- data/lib/filewatch/discoverer.rb +94 -0
- data/lib/filewatch/helper.rb +65 -0
- data/lib/filewatch/observing_base.rb +97 -0
- data/lib/filewatch/observing_read.rb +23 -0
- data/lib/filewatch/observing_tail.rb +22 -0
- data/lib/filewatch/read_mode/handlers/base.rb +81 -0
- data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
- data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
- data/lib/filewatch/read_mode/processor.rb +117 -0
- data/lib/filewatch/settings.rb +67 -0
- data/lib/filewatch/sincedb_collection.rb +215 -0
- data/lib/filewatch/sincedb_record_serializer.rb +70 -0
- data/lib/filewatch/sincedb_value.rb +87 -0
- data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
- data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
- data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
- data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
- data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
- data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
- data/lib/filewatch/tail_mode/processor.rb +209 -0
- data/lib/filewatch/watch.rb +107 -0
- data/lib/filewatch/watched_file.rb +226 -0
- data/lib/filewatch/watched_files_collection.rb +84 -0
- data/lib/filewatch/winhelper.rb +65 -0
- data/lib/jars/filewatch-1.0.0.jar +0 -0
- data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
- data/lib/logstash/inputs/file.rb +162 -107
- data/lib/logstash/inputs/file_listener.rb +61 -0
- data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
- data/logstash-input-file.gemspec +5 -4
- data/spec/filewatch/buftok_spec.rb +24 -0
- data/spec/filewatch/reading_spec.rb +128 -0
- data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
- data/spec/filewatch/spec_helper.rb +120 -0
- data/spec/filewatch/tailing_spec.rb +440 -0
- data/spec/filewatch/watched_file_spec.rb +38 -0
- data/spec/filewatch/watched_files_collection_spec.rb +73 -0
- data/spec/filewatch/winhelper_spec.rb +22 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/no-final-newline.log +2 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
- data/spec/inputs/file_read_spec.rb +155 -0
- data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
- metadata +96 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7dc40f218316d0466dc4e5195095a197f08a079e2ecdf7f8f6c24bb86bf1fa1
|
4
|
+
data.tar.gz: e44ef70fc261926779276c04010ea32326988648a535d9088af691835153ebcb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fb28fc538b07df9b7a39d0cdfa47979afee706bc98b7d718bbc43080e1ab7ce49492cca594f9a07876aa52cca7db1ca81fe47243bc9500af6ab565a2a0568e1
|
7
|
+
data.tar.gz: e13d85b4050710ba6fcd7492bf961ef4b6814bb4ba723642d3b96ae56a3e6291724ce893984cfb62e64b2c7225263ef1d49ed656f676afd48be1cf47df60a59a
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
## 4.1.0
|
2
|
+
- Move Filewatch code into the plugin folder, rework Filewatch code to use
|
3
|
+
Logstash facilities like logging and environment.
|
4
|
+
- New feature: `mode` setting. Introduces two modes, `tail` mode is the
|
5
|
+
existing behaviour for tailing, `read` mode is new behaviour that is
|
6
|
+
optimized for the read complete content scenario. Please read the docs to
|
7
|
+
fully appreciate the benefits of `read` mode.
|
8
|
+
- New feature: File completion actions. Settings `file_completed_action`
|
9
|
+
and `file_completed_log_path` control what actions to do after a file is
|
10
|
+
completely read. Applicable: `read` mode only.
|
11
|
+
- New feature: in `read` mode, compressed files can be processed, GZIP only.
|
12
|
+
- New feature: Files are sorted after being discovered. Settings `file_sort_by`
|
13
|
+
and `file_sort_direction` control the sort order. Applicable: any mode.
|
14
|
+
- New feature: Banded or striped file processing. Settings: `file_chunk_size`
|
15
|
+
and `file_chunk_count` control banded or striped processing. Applicable: any mode.
|
16
|
+
- New feature: `sincedb_clean_after` setting. Introduces expiry of sincedb
|
17
|
+
records. The default is 14 days. If, after `sincedb_clean_after` days, no
|
18
|
+
activity has been detected on a file (inode) the record expires and is not
|
19
|
+
written to disk. The persisted record now includes the "last activity seen"
|
20
|
+
timestamp. Applicable: any mode.
|
21
|
+
- Docs: extensive additions to introduce the new features.
|
22
|
+
|
1
23
|
## 4.0.5
|
2
24
|
- Docs: Set the default_codec doc attribute.
|
3
25
|
|
@@ -12,9 +34,9 @@
|
|
12
34
|
- Fix an issue with the rspec suite not finding log4j
|
13
35
|
|
14
36
|
## 4.0.0
|
15
|
-
- Breaking: `ignore_older` settings is disabled by default. Previously if the file was older than
|
16
|
-
24 hours (the default for ignore_older), it would be ignored. This confused new users a lot, specially
|
17
|
-
when they were reading new files with Logstash (with `start_position => beginning`). This setting also
|
37
|
+
- Breaking: `ignore_older` settings is disabled by default. Previously if the file was older than
|
38
|
+
24 hours (the default for ignore_older), it would be ignored. This confused new users a lot, specially
|
39
|
+
when they were reading new files with Logstash (with `start_position => beginning`). This setting also
|
18
40
|
makes it consistent with Filebeat.
|
19
41
|
|
20
42
|
## 3.1.2
|
data/JAR_VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.0
|
data/docs/index.asciidoc
CHANGED
@@ -25,21 +25,63 @@ Stream events from files, normally by tailing them in a manner
|
|
25
25
|
similar to `tail -0F` but optionally reading them from the
|
26
26
|
beginning.
|
27
27
|
|
28
|
-
By default, each event is assumed to be one line and a line is
|
29
|
-
taken to be the text before a newline character.
|
30
28
|
Normally, logging will add a newline to the end of each line written.
|
29
|
+
By default, each event is assumed to be one line
|
30
|
+
and a line is taken to be the text before a newline character.
|
31
31
|
If you would like to join multiple log lines into one event,
|
32
|
-
you'll want to use the multiline codec
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
32
|
+
you'll want to use the multiline codec.
|
33
|
+
The plugin loops between discovering new files and processing
|
34
|
+
each discovered file. Discovered files have a lifecycle, they start off
|
35
|
+
in the "watched" or "ignored" state. Other states in the lifecycle are:
|
36
|
+
"active", "closed" and "unwatched"
|
37
|
+
|
38
|
+
By default, a window of 4095 files is used to limit the number of file handles in use.
|
39
|
+
The processing phase has a number of stages:
|
40
|
+
|
41
|
+
* Checks whether "closed" or "ignored" files have changed in size since last time and
|
42
|
+
if so puts them in the "watched" state.
|
43
|
+
* Selects enough "watched" files to fill the available space in the window, these files
|
44
|
+
are made "active".
|
45
|
+
* The active files are opened and read, each file is read from the last known position
|
46
|
+
to the end of current content (EOF) by default.
|
47
|
+
|
48
|
+
In some cases it is useful to be able to control which files are read first, sorting,
|
49
|
+
and whether files are read completely or banded/striped.
|
50
|
+
Complete reading is *all of* file A then file B then file C and so on.
|
51
|
+
Banded or striped reading is *some of* file A then file B then file C and so on looping around
|
52
|
+
to file A again until all files are read. Banded reading is specified by changing
|
53
|
+
<<plugins-{type}s-{plugin}-file_chunk_count>> and perhaps <<plugins-{type}s-{plugin}-file_chunk_size>>.
|
54
|
+
Banding and sorting may be useful if you want some events from all files to appear
|
55
|
+
in Kibana as early as possible.
|
56
|
+
|
57
|
+
The plugin has two modes of operation, Tail mode and Read mode.
|
58
|
+
|
59
|
+
===== Tail mode
|
60
|
+
|
61
|
+
In this mode the plugin aims to track changing files and emit new content as it's
|
62
|
+
appended to each file. In this mode, files are seen as a never ending stream of
|
63
|
+
content and EOF has no special significance. The plugin always assumes that
|
64
|
+
there will be more content. When files are rotated, the smaller or zero size is
|
65
|
+
detected, the current position is reset to zero and streaming continues.
|
66
|
+
A delimiter must be seen before the accumulated characters can be emitted as a line.
|
67
|
+
|
68
|
+
===== Read mode
|
69
|
+
|
70
|
+
In this mode the plugin treats each file as if it is content complete, that is,
|
71
|
+
a finite stream of lines and now EOF is significant. A last delimiter is not
|
72
|
+
needed because EOF means that the accumulated characters can be emitted as a line.
|
73
|
+
Further, EOF here means that the file can be closed and put in the "unwatched"
|
74
|
+
state - this automatically frees up space in the active window. This mode also
|
75
|
+
makes it possible to process compressed files as they are content complete.
|
76
|
+
Read mode also allows for an action to take place after processing the file completely.
|
77
|
+
|
78
|
+
In the past attempts to simulate a Read mode while still assuming infinite streams
|
79
|
+
was not ideal and a dedicated Read mode is an improvement.
|
38
80
|
|
39
81
|
==== Reading from remote network volumes
|
40
82
|
|
41
|
-
The file input is not tested on remote filesystems such as NFS, Samba, s3fs-fuse, etc. These
|
42
|
-
remote filesystems typically have behaviors that are very different from local filesystems and
|
83
|
+
The file input is not tested on remote filesystems such as NFS, Samba, s3fs-fuse, etc. These
|
84
|
+
remote filesystems typically have behaviors that are very different from local filesystems and
|
43
85
|
are therefore unlikely to work correctly when used with the file input.
|
44
86
|
|
45
87
|
==== Tracking of current position in watched files
|
@@ -50,29 +92,38 @@ possible to stop and restart Logstash and have it pick up where it
|
|
50
92
|
left off without missing the lines that were added to the file while
|
51
93
|
Logstash was stopped.
|
52
94
|
|
53
|
-
By default, the sincedb file is placed in the
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
a sincedb path with the `sincedb_path` option.
|
95
|
+
By default, the sincedb file is placed in the data directory of Logstash
|
96
|
+
with a filename based on the filename patterns being watched (i.e. the `path` option).
|
97
|
+
Thus, changing the filename patterns will result in a new sincedb file being used and
|
98
|
+
any existing current position state will be lost. If you change your patterns
|
99
|
+
with any frequency it might make sense to explicitly choose a sincedb path
|
100
|
+
with the `sincedb_path` option.
|
60
101
|
|
61
102
|
A different `sincedb_path` must be used for each input. Using the same
|
62
103
|
path will cause issues. The read checkpoints for each input must be
|
63
104
|
stored in a different path so the information does not override.
|
64
105
|
|
65
|
-
Sincedb
|
106
|
+
Sincedb records can now be expired meaning that read positions of older files
|
107
|
+
will not be remembered after a certain time period. File systems may need to reuse
|
108
|
+
inodes for new content. Ideally, we would not use the read position of old content,
|
109
|
+
but we have no reliable way to detect that inode reuse has occurred. This is more
|
110
|
+
relevant to Read mode where a great many files are tracked in the sincedb.
|
111
|
+
Bear in mind though, if a record has expired, a previously seen file will be read again.
|
112
|
+
|
113
|
+
Sincedb files are text files with four (< v5.0.0), five or six columns:
|
66
114
|
|
67
115
|
. The inode number (or equivalent).
|
68
116
|
. The major device number of the file system (or equivalent).
|
69
117
|
. The minor device number of the file system (or equivalent).
|
70
118
|
. The current byte offset within the file.
|
119
|
+
. The last active timestamp (a floating point number)
|
120
|
+
. The last known path that this record was matched to (for
|
121
|
+
old sincedb records converted to the new format, this is blank.
|
71
122
|
|
72
123
|
On non-Windows systems you can obtain the inode number of a file
|
73
124
|
with e.g. `ls -li`.
|
74
125
|
|
75
|
-
==== File rotation
|
126
|
+
==== File rotation in Tail mode
|
76
127
|
|
77
128
|
File rotation is detected and handled by this input, regardless of
|
78
129
|
whether the file is rotated via a rename or a copy operation. To
|
@@ -102,9 +153,17 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
102
153
|
| <<plugins-{type}s-{plugin}-delimiter>> |<<string,string>>|No
|
103
154
|
| <<plugins-{type}s-{plugin}-discover_interval>> |<<number,number>>|No
|
104
155
|
| <<plugins-{type}s-{plugin}-exclude>> |<<array,array>>|No
|
156
|
+
| <<plugins-{type}s-{plugin}-file_chunk_count>> |<<number,number>>|No
|
157
|
+
| <<plugins-{type}s-{plugin}-file_chunk_size>> |<<number,number>>|No
|
158
|
+
| <<plugins-{type}s-{plugin}-file_completed_action>> |<<string,string>>, one of `["delete", "log", "log_and_delete"]`|No
|
159
|
+
| <<plugins-{type}s-{plugin}-file_completed_log_path>> |<<string,string>>|No
|
160
|
+
| <<plugins-{type}s-{plugin}-file_sort_by>> |<<string,string>>, one of `["last_modified", "path"]`|No
|
161
|
+
| <<plugins-{type}s-{plugin}-file_sort_direction>> |<<string,string>>, one of `["asc", "desc"]`|No
|
105
162
|
| <<plugins-{type}s-{plugin}-ignore_older>> |<<number,number>>|No
|
106
163
|
| <<plugins-{type}s-{plugin}-max_open_files>> |<<number,number>>|No
|
164
|
+
| <<plugins-{type}s-{plugin}-mode>> |<<string,string>>, one of `["tail", "read"]`|No
|
107
165
|
| <<plugins-{type}s-{plugin}-path>> |<<array,array>>|Yes
|
166
|
+
| <<plugins-{type}s-{plugin}-sincedb_clean_after>> |<<number,number>>|No
|
108
167
|
| <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
|
109
168
|
| <<plugins-{type}s-{plugin}-sincedb_write_interval>> |<<number,number>>|No
|
110
169
|
| <<plugins-{type}s-{plugin}-start_position>> |<<string,string>>, one of `["beginning", "end"]`|No
|
@@ -117,7 +176,7 @@ input plugins.
|
|
117
176
|
|
118
177
|
|
119
178
|
[id="plugins-{type}s-{plugin}-close_older"]
|
120
|
-
===== `close_older`
|
179
|
+
===== `close_older`
|
121
180
|
|
122
181
|
* Value type is <<number,number>>
|
123
182
|
* Default value is `3600`
|
@@ -129,18 +188,20 @@ read. If tailing, and there is a large time gap in incoming data the file
|
|
129
188
|
can be closed (allowing other files to be opened) but will be queued for
|
130
189
|
reopening when new data is detected. If reading, the file will be closed
|
131
190
|
after closed_older seconds from when the last bytes were read.
|
132
|
-
|
191
|
+
This setting is retained for backward compatibility if you upgrade the
|
192
|
+
plugin to 5.0.0+, are reading not tailing and do not switch to using Read mode.
|
133
193
|
|
134
194
|
[id="plugins-{type}s-{plugin}-delimiter"]
|
135
|
-
===== `delimiter`
|
195
|
+
===== `delimiter`
|
136
196
|
|
137
197
|
* Value type is <<string,string>>
|
138
198
|
* Default value is `"\n"`
|
139
199
|
|
140
|
-
set the new line delimiter, defaults to "\n"
|
200
|
+
set the new line delimiter, defaults to "\n". Note that when reading compressed files
|
201
|
+
this setting is not used, instead the standard Windows or Unix line endings are used.
|
141
202
|
|
142
203
|
[id="plugins-{type}s-{plugin}-discover_interval"]
|
143
|
-
===== `discover_interval`
|
204
|
+
===== `discover_interval`
|
144
205
|
|
145
206
|
* Value type is <<number,number>>
|
146
207
|
* Default value is `15`
|
@@ -149,7 +210,7 @@ How often (in seconds) we expand the filename patterns in the
|
|
149
210
|
`path` option to discover new files to watch.
|
150
211
|
|
151
212
|
[id="plugins-{type}s-{plugin}-exclude"]
|
152
|
-
===== `exclude`
|
213
|
+
===== `exclude`
|
153
214
|
|
154
215
|
* Value type is <<array,array>>
|
155
216
|
* There is no default value for this setting.
|
@@ -159,12 +220,79 @@ patterns are valid here, too. For example, if you have
|
|
159
220
|
[source,ruby]
|
160
221
|
path => "/var/log/*"
|
161
222
|
|
162
|
-
|
223
|
+
In Tail mode, you might want to exclude gzipped files:
|
163
224
|
[source,ruby]
|
164
225
|
exclude => "*.gz"
|
165
226
|
|
227
|
+
[id="plugins-{type}s-{plugin}-file_chunk_count"]
|
228
|
+
===== `file_chunk_count`
|
229
|
+
|
230
|
+
* Value type is <<number,number>>
|
231
|
+
* Default value is `4611686018427387903`
|
232
|
+
|
233
|
+
When combined with the `file_chunk_size`, this option sets how many chunks (bands or stripes)
|
234
|
+
are read from each file before moving to the next active file.
|
235
|
+
For example, a `file_chunk_count` of 32 and a `file_chunk_size` 32KB will process the next 1MB from each active file.
|
236
|
+
As the default is very large, the file is effectively read to EOF before moving to the next active file.
|
237
|
+
|
238
|
+
[id="plugins-{type}s-{plugin}-file_chunk_size"]
|
239
|
+
===== `file_chunk_size`
|
240
|
+
|
241
|
+
* Value type is <<number,number>>
|
242
|
+
* Default value is `32768` (32KB)
|
243
|
+
|
244
|
+
File content is read off disk in blocks or chunks and lines are extracted from the chunk.
|
245
|
+
See <<plugins-{type}s-{plugin}-file_chunk_count>> to see why and when to change this setting
|
246
|
+
from the default.
|
247
|
+
|
248
|
+
[id="plugins-{type}s-{plugin}-file_completed_action"]
|
249
|
+
===== `file_completed_action`
|
250
|
+
|
251
|
+
* Value can be any of: `delete`, `log`, `log_and_delete`
|
252
|
+
* The default is `delete`.
|
253
|
+
|
254
|
+
When in `read` mode, what action should be carried out when a file is done with.
|
255
|
+
If 'delete' is specified then the file will be deleted. If 'log' is specified
|
256
|
+
then the full path of the file is logged to the file specified in the
|
257
|
+
`file_completed_log_path` setting. If `log_and_delete` is specified then
|
258
|
+
both above actions take place.
|
259
|
+
|
260
|
+
[id="plugins-{type}s-{plugin}-file_completed_log_path"]
|
261
|
+
===== `file_completed_log_path`
|
262
|
+
|
263
|
+
* Value type is <<string,string>>
|
264
|
+
* There is no default value for this setting.
|
265
|
+
|
266
|
+
Which file should the completely read file paths be appended to. Only specify
|
267
|
+
this path to a file when `file_completed_action` is 'log' or 'log_and_delete'.
|
268
|
+
IMPORTANT: this file is appended to only - it could become very large. You are
|
269
|
+
responsible for file rotation.
|
270
|
+
|
271
|
+
[id="plugins-{type}s-{plugin}-file_sort_by"]
|
272
|
+
===== `file_sort_by`
|
273
|
+
|
274
|
+
* Value can be any of: `last_modified`, `path`
|
275
|
+
* The default is `last_modified`.
|
276
|
+
|
277
|
+
Which attribute of a "watched" file should be used to sort them by.
|
278
|
+
Files can be sorted by modified date or full path alphabetic.
|
279
|
+
Previously the processing order of the discovered and therefore
|
280
|
+
"watched" files was OS dependent.
|
281
|
+
|
282
|
+
[id="plugins-{type}s-{plugin}-file_sort_direction"]
|
283
|
+
===== `file_sort_direction`
|
284
|
+
|
285
|
+
* Value can be any of: `asc`, `desc`
|
286
|
+
* The default is `asc`.
|
287
|
+
|
288
|
+
Select between ascending and descending order when sorting "watched" files.
|
289
|
+
If oldest data first is important then the defaults of `last_modified` + `asc` are good.
|
290
|
+
If newest data first is more important then opt for `last_modified` + `desc`.
|
291
|
+
If you use special naming conventions for the file full paths then perhaps
|
292
|
+
`path` + `asc` will help to control the order of file processing.
|
293
|
+
|
166
294
|
[id="plugins-{type}s-{plugin}-ignore_older"]
|
167
|
-
===== `ignore_older`
|
295
|
+
===== `ignore_older`
|
168
296
|
|
169
297
|
* Value type is <<number,number>>
|
170
298
|
* There is no default value for this setting.
|
@@ -176,7 +304,7 @@ longer ignored and any new data is read. By default, this option is
|
|
176
304
|
disabled. Note this unit is in seconds.
|
177
305
|
|
178
306
|
[id="plugins-{type}s-{plugin}-max_open_files"]
|
179
|
-
===== `max_open_files`
|
307
|
+
===== `max_open_files`
|
180
308
|
|
181
309
|
* Value type is <<number,number>>
|
182
310
|
* There is no default value for this setting.
|
@@ -186,10 +314,29 @@ at any one time. Use close_older to close some files if you need to
|
|
186
314
|
process more files than this number. This should not be set to the
|
187
315
|
maximum the OS can do because file handles are needed for other
|
188
316
|
LS plugins and OS processes.
|
189
|
-
|
317
|
+
A default of 4095 is set in internally.
|
318
|
+
|
319
|
+
[id="plugins-{type}s-{plugin}-mode"]
|
320
|
+
===== `mode`
|
321
|
+
|
322
|
+
* Value can be either `tail` or `read`.
|
323
|
+
* The default value is `tail`.
|
324
|
+
|
325
|
+
What mode do you want the file input to operate in. Tail a few files or
|
326
|
+
read many content-complete files. Read mode now supports gzip file processing.
|
327
|
+
If "read" is specified then the following other settings are ignored:
|
328
|
+
|
329
|
+
. `start_position` (files are always read from the beginning)
|
330
|
+
. `close_older` (files are automatically 'closed' when EOF is reached)
|
331
|
+
|
332
|
+
If "read" is specified then the following settings are heeded:
|
333
|
+
|
334
|
+
. `ignore_older` (older files are not processed)
|
335
|
+
. `file_completed_action` (what action should be taken when the file is processed)
|
336
|
+
. `file_completed_log_path` (which file should the completed file path be logged to)
|
190
337
|
|
191
338
|
[id="plugins-{type}s-{plugin}-path"]
|
192
|
-
===== `path`
|
339
|
+
===== `path`
|
193
340
|
|
194
341
|
* This is a required setting.
|
195
342
|
* Value type is <<array,array>>
|
@@ -204,9 +351,21 @@ Paths must be absolute and cannot be relative.
|
|
204
351
|
You may also configure multiple paths. See an example
|
205
352
|
on the {logstash-ref}/configuration-file-structure.html#array[Logstash configuration page].
|
206
353
|
|
354
|
+
[id="plugins-{type}s-{plugin}-sincedb_clean_after"]
|
355
|
+
===== `sincedb_clean_after`
|
356
|
+
|
357
|
+
* Value type is <<number,number>>
|
358
|
+
* The default value for this setting is 14.
|
359
|
+
* This unit is in *days* and can be decimal e.g. 0.5 is 12 hours.
|
360
|
+
|
361
|
+
The sincedb record now has a last active timestamp associated with it.
|
362
|
+
If no changes are detected in a tracked file in the last N days its sincedb
|
363
|
+
tracking record expires and will not be persisted.
|
364
|
+
This option helps protect against the inode recycling problem.
|
365
|
+
Filebeat has a {filebeat-ref}/faq.html#inode-reuse-issue[FAQ about inode recycling].
|
207
366
|
|
208
367
|
[id="plugins-{type}s-{plugin}-sincedb_path"]
|
209
|
-
===== `sincedb_path`
|
368
|
+
===== `sincedb_path`
|
210
369
|
|
211
370
|
* Value type is <<string,string>>
|
212
371
|
* There is no default value for this setting.
|
@@ -217,7 +376,7 @@ The default will write sincedb files to `<path.data>/plugins/inputs/file`
|
|
217
376
|
NOTE: it must be a file path and not a directory path
|
218
377
|
|
219
378
|
[id="plugins-{type}s-{plugin}-sincedb_write_interval"]
|
220
|
-
===== `sincedb_write_interval`
|
379
|
+
===== `sincedb_write_interval`
|
221
380
|
|
222
381
|
* Value type is <<number,number>>
|
223
382
|
* Default value is `15`
|
@@ -226,7 +385,7 @@ How often (in seconds) to write a since database with the current position of
|
|
226
385
|
monitored log files.
|
227
386
|
|
228
387
|
[id="plugins-{type}s-{plugin}-start_position"]
|
229
|
-
===== `start_position`
|
388
|
+
===== `start_position`
|
230
389
|
|
231
390
|
* Value can be any of: `beginning`, `end`
|
232
391
|
* Default value is `"end"`
|
@@ -243,7 +402,7 @@ has already been seen before, this option has no effect and the
|
|
243
402
|
position recorded in the sincedb file will be used.
|
244
403
|
|
245
404
|
[id="plugins-{type}s-{plugin}-stat_interval"]
|
246
|
-
===== `stat_interval`
|
405
|
+
===== `stat_interval`
|
247
406
|
|
248
407
|
* Value type is <<number,number>>
|
249
408
|
* Default value is `1`
|
@@ -252,9 +411,8 @@ How often (in seconds) we stat files to see if they have been modified.
|
|
252
411
|
Increasing this interval will decrease the number of system calls we make,
|
253
412
|
but increase the time to detect new log lines.
|
254
413
|
|
255
|
-
|
256
|
-
|
257
414
|
[id="plugins-{type}s-{plugin}-common-options"]
|
258
415
|
include::{include_path}/{type}.asciidoc[]
|
259
416
|
|
260
|
-
:default_codec!:
|
417
|
+
:default_codec!:
|
418
|
+
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "rbconfig"
|
3
|
+
require "pathname"
|
4
|
+
# require "logstash/environment"
|
5
|
+
|
6
|
+
## Common setup
|
7
|
+
# all the required constants and files
|
8
|
+
# defined in one place
|
9
|
+
module FileWatch
|
10
|
+
# the number of bytes read from a file during the read phase
|
11
|
+
FILE_READ_SIZE = 32768
|
12
|
+
# the largest fixnum in ruby
|
13
|
+
# this is used in the read loop e.g.
|
14
|
+
# @opts[:file_chunk_count].times do
|
15
|
+
# where file_chunk_count defaults to this constant
|
16
|
+
FIXNUM_MAX = (2**(0.size * 8 - 2) - 1)
|
17
|
+
|
18
|
+
require_relative "helper"
|
19
|
+
|
20
|
+
module WindowsInode
|
21
|
+
def prepare_inode(path, stat)
|
22
|
+
fileId = Winhelper.GetWindowsUniqueFileIdentifier(path)
|
23
|
+
[fileId, 0, 0] # dev_* doesn't make sense on Windows
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
module UnixInode
|
28
|
+
def prepare_inode(path, stat)
|
29
|
+
[stat.ino.to_s, stat.dev_major, stat.dev_minor]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
jar_version = IO.read("JAR_VERSION").strip
|
34
|
+
|
35
|
+
require "java"
|
36
|
+
require_relative "../../lib/jars/filewatch-#{jar_version}.jar"
|
37
|
+
require "jruby_file_watch"
|
38
|
+
|
39
|
+
if LogStash::Environment.windows?
|
40
|
+
require "winhelper"
|
41
|
+
FileOpener = FileExt
|
42
|
+
InodeMixin = WindowsInode
|
43
|
+
else
|
44
|
+
FileOpener = ::File
|
45
|
+
InodeMixin = UnixInode
|
46
|
+
end
|
47
|
+
|
48
|
+
# Structs can be used as hash keys because they compare by value
|
49
|
+
# this is used as the key for values in the sincedb hash
|
50
|
+
InodeStruct = Struct.new(:inode, :maj, :min) do
|
51
|
+
def to_s
|
52
|
+
to_a.join(" ")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class NoSinceDBPathGiven < StandardError; end
|
57
|
+
|
58
|
+
# how often (in seconds) we logger.warn a failed file open, per path.
|
59
|
+
OPEN_WARN_INTERVAL = ENV.fetch("FILEWATCH_OPEN_WARN_INTERVAL", 300).to_i
|
60
|
+
MAX_FILES_WARN_INTERVAL = ENV.fetch("FILEWATCH_MAX_FILES_WARN_INTERVAL", 20).to_i
|
61
|
+
|
62
|
+
require "logstash/util/buftok"
|
63
|
+
require_relative "settings"
|
64
|
+
require_relative "sincedb_value"
|
65
|
+
require_relative "sincedb_record_serializer"
|
66
|
+
require_relative "watched_files_collection"
|
67
|
+
require_relative "sincedb_collection"
|
68
|
+
require_relative "watch"
|
69
|
+
require_relative "watched_file"
|
70
|
+
require_relative "discoverer"
|
71
|
+
require_relative "observing_base"
|
72
|
+
require_relative "observing_tail"
|
73
|
+
require_relative "observing_read"
|
74
|
+
end
|