logstash-input-file 4.0.5 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -3
  3. data/JAR_VERSION +1 -0
  4. data/docs/index.asciidoc +195 -37
  5. data/lib/filewatch/bootstrap.rb +74 -0
  6. data/lib/filewatch/discoverer.rb +94 -0
  7. data/lib/filewatch/helper.rb +65 -0
  8. data/lib/filewatch/observing_base.rb +97 -0
  9. data/lib/filewatch/observing_read.rb +23 -0
  10. data/lib/filewatch/observing_tail.rb +22 -0
  11. data/lib/filewatch/read_mode/handlers/base.rb +81 -0
  12. data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
  13. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
  14. data/lib/filewatch/read_mode/processor.rb +117 -0
  15. data/lib/filewatch/settings.rb +67 -0
  16. data/lib/filewatch/sincedb_collection.rb +215 -0
  17. data/lib/filewatch/sincedb_record_serializer.rb +70 -0
  18. data/lib/filewatch/sincedb_value.rb +87 -0
  19. data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
  20. data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
  21. data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
  22. data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
  23. data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
  24. data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
  25. data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
  26. data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
  27. data/lib/filewatch/tail_mode/processor.rb +209 -0
  28. data/lib/filewatch/watch.rb +107 -0
  29. data/lib/filewatch/watched_file.rb +226 -0
  30. data/lib/filewatch/watched_files_collection.rb +84 -0
  31. data/lib/filewatch/winhelper.rb +65 -0
  32. data/lib/jars/filewatch-1.0.0.jar +0 -0
  33. data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
  34. data/lib/logstash/inputs/file.rb +162 -107
  35. data/lib/logstash/inputs/file_listener.rb +61 -0
  36. data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
  37. data/logstash-input-file.gemspec +5 -4
  38. data/spec/filewatch/buftok_spec.rb +24 -0
  39. data/spec/filewatch/reading_spec.rb +128 -0
  40. data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
  41. data/spec/filewatch/spec_helper.rb +120 -0
  42. data/spec/filewatch/tailing_spec.rb +440 -0
  43. data/spec/filewatch/watched_file_spec.rb +38 -0
  44. data/spec/filewatch/watched_files_collection_spec.rb +73 -0
  45. data/spec/filewatch/winhelper_spec.rb +22 -0
  46. data/spec/fixtures/compressed.log.gz +0 -0
  47. data/spec/fixtures/compressed.log.gzip +0 -0
  48. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  49. data/spec/fixtures/no-final-newline.log +2 -0
  50. data/spec/fixtures/uncompressed.log +2 -0
  51. data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
  52. data/spec/inputs/file_read_spec.rb +155 -0
  53. data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
  54. metadata +96 -28
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a350ad01d375081f28d84cf9f53e6bfc0371d65ac91ce5e88593687a6c64f3c3
4
- data.tar.gz: e614a00ea2155df21147017b478a61b88d845588413777c60ccc793007065141
3
+ metadata.gz: f7dc40f218316d0466dc4e5195095a197f08a079e2ecdf7f8f6c24bb86bf1fa1
4
+ data.tar.gz: e44ef70fc261926779276c04010ea32326988648a535d9088af691835153ebcb
5
5
  SHA512:
6
- metadata.gz: 734808f9026847742f5e01a7ba5c4c4d8d51dc4dcfdf1f6436a611f61844ccb8547b13e14ea7b5dc378dcd76614a63de65f9719569c861078ab4bdde53a99443
7
- data.tar.gz: 68912e21ad59828dcd01154d6b2aed94370408d879fe8bb3ee7385b02aaa671ab87dec463378d76501c82ea7ef4056cbe0a154caba6b7a0f3cc2edf831b487a4
6
+ metadata.gz: 6fb28fc538b07df9b7a39d0cdfa47979afee706bc98b7d718bbc43080e1ab7ce49492cca594f9a07876aa52cca7db1ca81fe47243bc9500af6ab565a2a0568e1
7
+ data.tar.gz: e13d85b4050710ba6fcd7492bf961ef4b6814bb4ba723642d3b96ae56a3e6291724ce893984cfb62e64b2c7225263ef1d49ed656f676afd48be1cf47df60a59a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,25 @@
1
+ ## 4.1.0
2
+ - Move Filewatch code into the plugin folder, rework Filewatch code to use
3
+ Logstash facilities like logging and environment.
4
+ - New feature: `mode` setting. Introduces two modes, `tail` mode is the
5
+ existing behaviour for tailing, `read` mode is new behaviour that is
6
+ optimized for the read complete content scenario. Please read the docs to
7
+ fully appreciate the benefits of `read` mode.
8
+ - New feature: File completion actions. Settings `file_completed_action`
9
+ and `file_completed_log_path` control what actions to do after a file is
10
+ completely read. Applicable: `read` mode only.
11
+ - New feature: in `read` mode, compressed files can be processed, GZIP only.
12
+ - New feature: Files are sorted after being discovered. Settings `file_sort_by`
13
+ and `file_sort_direction` control the sort order. Applicable: any mode.
14
+ - New feature: Banded or striped file processing. Settings: `file_chunk_size`
15
+ and `file_chunk_count` control banded or striped processing. Applicable: any mode.
16
+ - New feature: `sincedb_clean_after` setting. Introduces expiry of sincedb
17
+ records. The default is 14 days. If, after `sincedb_clean_after` days, no
18
+ activity has been detected on a file (inode) the record expires and is not
19
+ written to disk. The persisted record now includes the "last activity seen"
20
+ timestamp. Applicable: any mode.
21
+ - Docs: extensive additions to introduce the new features.
22
+
1
23
  ## 4.0.5
2
24
  - Docs: Set the default_codec doc attribute.
3
25
 
@@ -12,9 +34,9 @@
12
34
  - Fix an issue with the rspec suite not finding log4j
13
35
 
14
36
  ## 4.0.0
15
- - Breaking: `ignore_older` settings is disabled by default. Previously if the file was older than
16
- 24 hours (the default for ignore_older), it would be ignored. This confused new users a lot, specially
17
- when they were reading new files with Logstash (with `start_position => beginning`). This setting also
37
+ - Breaking: `ignore_older` settings is disabled by default. Previously if the file was older than
38
+ 24 hours (the default for ignore_older), it would be ignored. This confused new users a lot, specially
39
+ when they were reading new files with Logstash (with `start_position => beginning`). This setting also
18
40
  makes it consistent with Filebeat.
19
41
 
20
42
  ## 3.1.2
data/JAR_VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
data/docs/index.asciidoc CHANGED
@@ -25,21 +25,63 @@ Stream events from files, normally by tailing them in a manner
25
25
  similar to `tail -0F` but optionally reading them from the
26
26
  beginning.
27
27
 
28
- By default, each event is assumed to be one line and a line is
29
- taken to be the text before a newline character.
30
28
  Normally, logging will add a newline to the end of each line written.
29
+ By default, each event is assumed to be one line
30
+ and a line is taken to be the text before a newline character.
31
31
  If you would like to join multiple log lines into one event,
32
- you'll want to use the multiline codec or filter.
33
-
34
- The plugin aims to track changing files and emit new content as it's
35
- appended to each file. It's not well-suited for reading a file from
36
- beginning to end and storing all of it in a single event (not even
37
- with the multiline codec or filter).
32
+ you'll want to use the multiline codec.
33
+ The plugin loops between discovering new files and processing
34
+ each discovered file. Discovered files have a lifecycle, they start off
35
+ in the "watched" or "ignored" state. Other states in the lifecycle are:
36
+ "active", "closed" and "unwatched"
37
+
38
+ By default, a window of 4095 files is used to limit the number of file handles in use.
39
+ The processing phase has a number of stages:
40
+
41
+ * Checks whether "closed" or "ignored" files have changed in size since last time and
42
+ if so puts them in the "watched" state.
43
+ * Selects enough "watched" files to fill the available space in the window, these files
44
+ are made "active".
45
+ * The active files are opened and read, each file is read from the last known position
46
+ to the end of current content (EOF) by default.
47
+
48
+ In some cases it is useful to be able to control which files are read first, sorting,
49
+ and whether files are read completely or banded/striped.
50
+ Complete reading is *all of* file A then file B then file C and so on.
51
+ Banded or striped reading is *some of* file A then file B then file C and so on looping around
52
+ to file A again until all files are read. Banded reading is specified by changing
53
+ <<plugins-{type}s-{plugin}-file_chunk_count>> and perhaps <<plugins-{type}s-{plugin}-file_chunk_size>>.
54
+ Banding and sorting may be useful if you want some events from all files to appear
55
+ in Kibana as early as possible.
56
+
57
+ The plugin has two modes of operation, Tail mode and Read mode.
58
+
59
+ ===== Tail mode
60
+
61
+ In this mode the plugin aims to track changing files and emit new content as it's
62
+ appended to each file. In this mode, files are seen as a never ending stream of
63
+ content and EOF has no special significance. The plugin always assumes that
64
+ there will be more content. When files are rotated, the smaller or zero size is
65
+ detected, the current position is reset to zero and streaming continues.
66
+ A delimiter must be seen before the accumulated characters can be emitted as a line.
67
+
68
+ ===== Read mode
69
+
70
+ In this mode the plugin treats each file as if it is content complete, that is,
71
+ a finite stream of lines and now EOF is significant. A last delimiter is not
72
+ needed because EOF means that the accumulated characters can be emitted as a line.
73
+ Further, EOF here means that the file can be closed and put in the "unwatched"
74
+ state - this automatically frees up space in the active window. This mode also
75
+ makes it possible to process compressed files as they are content complete.
76
+ Read mode also allows for an action to take place after processing the file completely.
77
+
78
+ In the past attempts to simulate a Read mode while still assuming infinite streams
79
+ was not ideal and a dedicated Read mode is an improvement.
38
80
 
39
81
  ==== Reading from remote network volumes
40
82
 
41
- The file input is not tested on remote filesystems such as NFS, Samba, s3fs-fuse, etc. These
42
- remote filesystems typically have behaviors that are very different from local filesystems and
83
+ The file input is not tested on remote filesystems such as NFS, Samba, s3fs-fuse, etc. These
84
+ remote filesystems typically have behaviors that are very different from local filesystems and
43
85
  are therefore unlikely to work correctly when used with the file input.
44
86
 
45
87
  ==== Tracking of current position in watched files
@@ -50,29 +92,38 @@ possible to stop and restart Logstash and have it pick up where it
50
92
  left off without missing the lines that were added to the file while
51
93
  Logstash was stopped.
52
94
 
53
- By default, the sincedb file is placed in the home directory of the
54
- user running Logstash with a filename based on the filename patterns
55
- being watched (i.e. the `path` option). Thus, changing the filename
56
- patterns will result in a new sincedb file being used and any
57
- existing current position state will be lost. If you change your
58
- patterns with any frequency it might make sense to explicitly choose
59
- a sincedb path with the `sincedb_path` option.
95
+ By default, the sincedb file is placed in the data directory of Logstash
96
+ with a filename based on the filename patterns being watched (i.e. the `path` option).
97
+ Thus, changing the filename patterns will result in a new sincedb file being used and
98
+ any existing current position state will be lost. If you change your patterns
99
+ with any frequency it might make sense to explicitly choose a sincedb path
100
+ with the `sincedb_path` option.
60
101
 
61
102
  A different `sincedb_path` must be used for each input. Using the same
62
103
  path will cause issues. The read checkpoints for each input must be
63
104
  stored in a different path so the information does not override.
64
105
 
65
- Sincedb files are text files with four columns:
106
+ Sincedb records can now be expired meaning that read positions of older files
107
+ will not be remembered after a certain time period. File systems may need to reuse
108
+ inodes for new content. Ideally, we would not use the read position of old content,
109
+ but we have no reliable way to detect that inode reuse has occurred. This is more
110
+ relevant to Read mode where a great many files are tracked in the sincedb.
111
+ Bear in mind though, if a record has expired, a previously seen file will be read again.
112
+
113
+ Sincedb files are text files with four (< v5.0.0), five or six columns:
66
114
 
67
115
  . The inode number (or equivalent).
68
116
  . The major device number of the file system (or equivalent).
69
117
  . The minor device number of the file system (or equivalent).
70
118
  . The current byte offset within the file.
119
+ . The last active timestamp (a floating point number)
120
+ . The last known path that this record was matched to (for
121
+ old sincedb records converted to the new format, this is blank.
71
122
 
72
123
  On non-Windows systems you can obtain the inode number of a file
73
124
  with e.g. `ls -li`.
74
125
 
75
- ==== File rotation
126
+ ==== File rotation in Tail mode
76
127
 
77
128
  File rotation is detected and handled by this input, regardless of
78
129
  whether the file is rotated via a rename or a copy operation. To
@@ -102,9 +153,17 @@ This plugin supports the following configuration options plus the <<plugins-{typ
102
153
  | <<plugins-{type}s-{plugin}-delimiter>> |<<string,string>>|No
103
154
  | <<plugins-{type}s-{plugin}-discover_interval>> |<<number,number>>|No
104
155
  | <<plugins-{type}s-{plugin}-exclude>> |<<array,array>>|No
156
+ | <<plugins-{type}s-{plugin}-file_chunk_count>> |<<number,number>>|No
157
+ | <<plugins-{type}s-{plugin}-file_chunk_size>> |<<number,number>>|No
158
+ | <<plugins-{type}s-{plugin}-file_completed_action>> |<<string,string>>, one of `["delete", "log", "log_and_delete"]`|No
159
+ | <<plugins-{type}s-{plugin}-file_completed_log_path>> |<<string,string>>|No
160
+ | <<plugins-{type}s-{plugin}-file_sort_by>> |<<string,string>>, one of `["last_modified", "path"]`|No
161
+ | <<plugins-{type}s-{plugin}-file_sort_direction>> |<<string,string>>, one of `["asc", "desc"]`|No
105
162
  | <<plugins-{type}s-{plugin}-ignore_older>> |<<number,number>>|No
106
163
  | <<plugins-{type}s-{plugin}-max_open_files>> |<<number,number>>|No
164
+ | <<plugins-{type}s-{plugin}-mode>> |<<string,string>>, one of `["tail", "read"]`|No
107
165
  | <<plugins-{type}s-{plugin}-path>> |<<array,array>>|Yes
166
+ | <<plugins-{type}s-{plugin}-sincedb_clean_after>> |<<number,number>>|No
108
167
  | <<plugins-{type}s-{plugin}-sincedb_path>> |<<string,string>>|No
109
168
  | <<plugins-{type}s-{plugin}-sincedb_write_interval>> |<<number,number>>|No
110
169
  | <<plugins-{type}s-{plugin}-start_position>> |<<string,string>>, one of `["beginning", "end"]`|No
@@ -117,7 +176,7 @@ input plugins.
117
176
  &nbsp;
118
177
 
119
178
  [id="plugins-{type}s-{plugin}-close_older"]
120
- ===== `close_older`
179
+ ===== `close_older`
121
180
 
122
181
  * Value type is <<number,number>>
123
182
  * Default value is `3600`
@@ -129,18 +188,20 @@ read. If tailing, and there is a large time gap in incoming data the file
129
188
  can be closed (allowing other files to be opened) but will be queued for
130
189
  reopening when new data is detected. If reading, the file will be closed
131
190
  after closed_older seconds from when the last bytes were read.
132
- The default is 1 hour
191
+ This setting is retained for backward compatibility if you upgrade the
192
+ plugin to 5.0.0+, are reading not tailing and do not switch to using Read mode.
133
193
 
134
194
  [id="plugins-{type}s-{plugin}-delimiter"]
135
- ===== `delimiter`
195
+ ===== `delimiter`
136
196
 
137
197
  * Value type is <<string,string>>
138
198
  * Default value is `"\n"`
139
199
 
140
- set the new line delimiter, defaults to "\n"
200
+ set the new line delimiter, defaults to "\n". Note that when reading compressed files
201
+ this setting is not used, instead the standard Windows or Unix line endings are used.
141
202
 
142
203
  [id="plugins-{type}s-{plugin}-discover_interval"]
143
- ===== `discover_interval`
204
+ ===== `discover_interval`
144
205
 
145
206
  * Value type is <<number,number>>
146
207
  * Default value is `15`
@@ -149,7 +210,7 @@ How often (in seconds) we expand the filename patterns in the
149
210
  `path` option to discover new files to watch.
150
211
 
151
212
  [id="plugins-{type}s-{plugin}-exclude"]
152
- ===== `exclude`
213
+ ===== `exclude`
153
214
 
154
215
  * Value type is <<array,array>>
155
216
  * There is no default value for this setting.
@@ -159,12 +220,79 @@ patterns are valid here, too. For example, if you have
159
220
  [source,ruby]
160
221
  path => "/var/log/*"
161
222
 
162
- You might want to exclude gzipped files:
223
+ In Tail mode, you might want to exclude gzipped files:
163
224
  [source,ruby]
164
225
  exclude => "*.gz"
165
226
 
227
+ [id="plugins-{type}s-{plugin}-file_chunk_count"]
228
+ ===== `file_chunk_count`
229
+
230
+ * Value type is <<number,number>>
231
+ * Default value is `4611686018427387903`
232
+
233
+ When combined with the `file_chunk_size`, this option sets how many chunks (bands or stripes)
234
+ are read from each file before moving to the next active file.
235
+ For example, a `file_chunk_count` of 32 and a `file_chunk_size` 32KB will process the next 1MB from each active file.
236
+ As the default is very large, the file is effectively read to EOF before moving to the next active file.
237
+
238
+ [id="plugins-{type}s-{plugin}-file_chunk_size"]
239
+ ===== `file_chunk_size`
240
+
241
+ * Value type is <<number,number>>
242
+ * Default value is `32768` (32KB)
243
+
244
+ File content is read off disk in blocks or chunks and lines are extracted from the chunk.
245
+ See <<plugins-{type}s-{plugin}-file_chunk_count>> to see why and when to change this setting
246
+ from the default.
247
+
248
+ [id="plugins-{type}s-{plugin}-file_completed_action"]
249
+ ===== `file_completed_action`
250
+
251
+ * Value can be any of: `delete`, `log`, `log_and_delete`
252
+ * The default is `delete`.
253
+
254
+ When in `read` mode, what action should be carried out when a file is done with.
255
+ If 'delete' is specified then the file will be deleted. If 'log' is specified
256
+ then the full path of the file is logged to the file specified in the
257
+ `file_completed_log_path` setting. If `log_and_delete` is specified then
258
+ both above actions take place.
259
+
260
+ [id="plugins-{type}s-{plugin}-file_completed_log_path"]
261
+ ===== `file_completed_log_path`
262
+
263
+ * Value type is <<string,string>>
264
+ * There is no default value for this setting.
265
+
266
+ Which file should the completely read file paths be appended to. Only specify
267
+ this path to a file when `file_completed_action` is 'log' or 'log_and_delete'.
268
+ IMPORTANT: this file is appended to only - it could become very large. You are
269
+ responsible for file rotation.
270
+
271
+ [id="plugins-{type}s-{plugin}-file_sort_by"]
272
+ ===== `file_sort_by`
273
+
274
+ * Value can be any of: `last_modified`, `path`
275
+ * The default is `last_modified`.
276
+
277
+ Which attribute of a "watched" file should be used to sort them by.
278
+ Files can be sorted by modified date or full path alphabetic.
279
+ Previously the processing order of the discovered and therefore
280
+ "watched" files was OS dependent.
281
+
282
+ [id="plugins-{type}s-{plugin}-file_sort_direction"]
283
+ ===== `file_sort_direction`
284
+
285
+ * Value can be any of: `asc`, `desc`
286
+ * The default is `asc`.
287
+
288
+ Select between ascending and descending order when sorting "watched" files.
289
+ If oldest data first is important then the defaults of `last_modified` + `asc` are good.
290
+ If newest data first is more important then opt for `last_modified` + `desc`.
291
+ If you use special naming conventions for the file full paths then perhaps
292
+ `path` + `asc` will help to control the order of file processing.
293
+
166
294
  [id="plugins-{type}s-{plugin}-ignore_older"]
167
- ===== `ignore_older`
295
+ ===== `ignore_older`
168
296
 
169
297
  * Value type is <<number,number>>
170
298
  * There is no default value for this setting.
@@ -176,7 +304,7 @@ longer ignored and any new data is read. By default, this option is
176
304
  disabled. Note this unit is in seconds.
177
305
 
178
306
  [id="plugins-{type}s-{plugin}-max_open_files"]
179
- ===== `max_open_files`
307
+ ===== `max_open_files`
180
308
 
181
309
  * Value type is <<number,number>>
182
310
  * There is no default value for this setting.
@@ -186,10 +314,29 @@ at any one time. Use close_older to close some files if you need to
186
314
  process more files than this number. This should not be set to the
187
315
  maximum the OS can do because file handles are needed for other
188
316
  LS plugins and OS processes.
189
- The default of 4095 is set in filewatch.
317
+ A default of 4095 is set in internally.
318
+
319
+ [id="plugins-{type}s-{plugin}-mode"]
320
+ ===== `mode`
321
+
322
+ * Value can be either `tail` or `read`.
323
+ * The default value is `tail`.
324
+
325
+ What mode do you want the file input to operate in. Tail a few files or
326
+ read many content-complete files. Read mode now supports gzip file processing.
327
+ If "read" is specified then the following other settings are ignored:
328
+
329
+ . `start_position` (files are always read from the beginning)
330
+ . `close_older` (files are automatically 'closed' when EOF is reached)
331
+
332
+ If "read" is specified then the following settings are heeded:
333
+
334
+ . `ignore_older` (older files are not processed)
335
+ . `file_completed_action` (what action should be taken when the file is processed)
336
+ . `file_completed_log_path` (which file should the completed file path be logged to)
190
337
 
191
338
  [id="plugins-{type}s-{plugin}-path"]
192
- ===== `path`
339
+ ===== `path`
193
340
 
194
341
  * This is a required setting.
195
342
  * Value type is <<array,array>>
@@ -204,9 +351,21 @@ Paths must be absolute and cannot be relative.
204
351
  You may also configure multiple paths. See an example
205
352
  on the {logstash-ref}/configuration-file-structure.html#array[Logstash configuration page].
206
353
 
354
+ [id="plugins-{type}s-{plugin}-sincedb_clean_after"]
355
+ ===== `sincedb_clean_after`
356
+
357
+ * Value type is <<number,number>>
358
+ * The default value for this setting is 14.
359
+ * This unit is in *days* and can be decimal e.g. 0.5 is 12 hours.
360
+
361
+ The sincedb record now has a last active timestamp associated with it.
362
+ If no changes are detected in a tracked file in the last N days its sincedb
363
+ tracking record expires and will not be persisted.
364
+ This option helps protect against the inode recycling problem.
365
+ Filebeat has a {filebeat-ref}/faq.html#inode-reuse-issue[FAQ about inode recycling].
207
366
 
208
367
  [id="plugins-{type}s-{plugin}-sincedb_path"]
209
- ===== `sincedb_path`
368
+ ===== `sincedb_path`
210
369
 
211
370
  * Value type is <<string,string>>
212
371
  * There is no default value for this setting.
@@ -217,7 +376,7 @@ The default will write sincedb files to `<path.data>/plugins/inputs/file`
217
376
  NOTE: it must be a file path and not a directory path
218
377
 
219
378
  [id="plugins-{type}s-{plugin}-sincedb_write_interval"]
220
- ===== `sincedb_write_interval`
379
+ ===== `sincedb_write_interval`
221
380
 
222
381
  * Value type is <<number,number>>
223
382
  * Default value is `15`
@@ -226,7 +385,7 @@ How often (in seconds) to write a since database with the current position of
226
385
  monitored log files.
227
386
 
228
387
  [id="plugins-{type}s-{plugin}-start_position"]
229
- ===== `start_position`
388
+ ===== `start_position`
230
389
 
231
390
  * Value can be any of: `beginning`, `end`
232
391
  * Default value is `"end"`
@@ -243,7 +402,7 @@ has already been seen before, this option has no effect and the
243
402
  position recorded in the sincedb file will be used.
244
403
 
245
404
  [id="plugins-{type}s-{plugin}-stat_interval"]
246
- ===== `stat_interval`
405
+ ===== `stat_interval`
247
406
 
248
407
  * Value type is <<number,number>>
249
408
  * Default value is `1`
@@ -252,9 +411,8 @@ How often (in seconds) we stat files to see if they have been modified.
252
411
  Increasing this interval will decrease the number of system calls we make,
253
412
  but increase the time to detect new log lines.
254
413
 
255
-
256
-
257
414
  [id="plugins-{type}s-{plugin}-common-options"]
258
415
  include::{include_path}/{type}.asciidoc[]
259
416
 
260
- :default_codec!:
417
+ :default_codec!:
418
+
@@ -0,0 +1,74 @@
1
+ # encoding: utf-8
2
+ require "rbconfig"
3
+ require "pathname"
4
+ # require "logstash/environment"
5
+
6
+ ## Common setup
7
+ # all the required constants and files
8
+ # defined in one place
9
+ module FileWatch
10
+ # the number of bytes read from a file during the read phase
11
+ FILE_READ_SIZE = 32768
12
+ # the largest fixnum in ruby
13
+ # this is used in the read loop e.g.
14
+ # @opts[:file_chunk_count].times do
15
+ # where file_chunk_count defaults to this constant
16
+ FIXNUM_MAX = (2**(0.size * 8 - 2) - 1)
17
+
18
+ require_relative "helper"
19
+
20
+ module WindowsInode
21
+ def prepare_inode(path, stat)
22
+ fileId = Winhelper.GetWindowsUniqueFileIdentifier(path)
23
+ [fileId, 0, 0] # dev_* doesn't make sense on Windows
24
+ end
25
+ end
26
+
27
+ module UnixInode
28
+ def prepare_inode(path, stat)
29
+ [stat.ino.to_s, stat.dev_major, stat.dev_minor]
30
+ end
31
+ end
32
+
33
+ jar_version = IO.read("JAR_VERSION").strip
34
+
35
+ require "java"
36
+ require_relative "../../lib/jars/filewatch-#{jar_version}.jar"
37
+ require "jruby_file_watch"
38
+
39
+ if LogStash::Environment.windows?
40
+ require "winhelper"
41
+ FileOpener = FileExt
42
+ InodeMixin = WindowsInode
43
+ else
44
+ FileOpener = ::File
45
+ InodeMixin = UnixInode
46
+ end
47
+
48
+ # Structs can be used as hash keys because they compare by value
49
+ # this is used as the key for values in the sincedb hash
50
+ InodeStruct = Struct.new(:inode, :maj, :min) do
51
+ def to_s
52
+ to_a.join(" ")
53
+ end
54
+ end
55
+
56
+ class NoSinceDBPathGiven < StandardError; end
57
+
58
+ # how often (in seconds) we logger.warn a failed file open, per path.
59
+ OPEN_WARN_INTERVAL = ENV.fetch("FILEWATCH_OPEN_WARN_INTERVAL", 300).to_i
60
+ MAX_FILES_WARN_INTERVAL = ENV.fetch("FILEWATCH_MAX_FILES_WARN_INTERVAL", 20).to_i
61
+
62
+ require "logstash/util/buftok"
63
+ require_relative "settings"
64
+ require_relative "sincedb_value"
65
+ require_relative "sincedb_record_serializer"
66
+ require_relative "watched_files_collection"
67
+ require_relative "sincedb_collection"
68
+ require_relative "watch"
69
+ require_relative "watched_file"
70
+ require_relative "discoverer"
71
+ require_relative "observing_base"
72
+ require_relative "observing_tail"
73
+ require_relative "observing_read"
74
+ end