logstash-input-file 4.0.5 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -3
- data/JAR_VERSION +1 -0
- data/docs/index.asciidoc +195 -37
- data/lib/filewatch/bootstrap.rb +74 -0
- data/lib/filewatch/discoverer.rb +94 -0
- data/lib/filewatch/helper.rb +65 -0
- data/lib/filewatch/observing_base.rb +97 -0
- data/lib/filewatch/observing_read.rb +23 -0
- data/lib/filewatch/observing_tail.rb +22 -0
- data/lib/filewatch/read_mode/handlers/base.rb +81 -0
- data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
- data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
- data/lib/filewatch/read_mode/processor.rb +117 -0
- data/lib/filewatch/settings.rb +67 -0
- data/lib/filewatch/sincedb_collection.rb +215 -0
- data/lib/filewatch/sincedb_record_serializer.rb +70 -0
- data/lib/filewatch/sincedb_value.rb +87 -0
- data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
- data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
- data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
- data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
- data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
- data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
- data/lib/filewatch/tail_mode/processor.rb +209 -0
- data/lib/filewatch/watch.rb +107 -0
- data/lib/filewatch/watched_file.rb +226 -0
- data/lib/filewatch/watched_files_collection.rb +84 -0
- data/lib/filewatch/winhelper.rb +65 -0
- data/lib/jars/filewatch-1.0.0.jar +0 -0
- data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
- data/lib/logstash/inputs/file.rb +162 -107
- data/lib/logstash/inputs/file_listener.rb +61 -0
- data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
- data/logstash-input-file.gemspec +5 -4
- data/spec/filewatch/buftok_spec.rb +24 -0
- data/spec/filewatch/reading_spec.rb +128 -0
- data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
- data/spec/filewatch/spec_helper.rb +120 -0
- data/spec/filewatch/tailing_spec.rb +440 -0
- data/spec/filewatch/watched_file_spec.rb +38 -0
- data/spec/filewatch/watched_files_collection_spec.rb +73 -0
- data/spec/filewatch/winhelper_spec.rb +22 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/no-final-newline.log +2 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
- data/spec/inputs/file_read_spec.rb +155 -0
- data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
- metadata +96 -28
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
|
4
|
+
module FileWatch
|
5
|
+
class Watch
|
6
|
+
include LogStash::Util::Loggable
|
7
|
+
|
8
|
+
attr_accessor :lastwarn_max_files
|
9
|
+
attr_reader :discoverer, :watched_files_collection
|
10
|
+
|
11
|
+
def initialize(discoverer, watched_files_collection, settings)
|
12
|
+
@settings = settings
|
13
|
+
# watch and iterate_on_state can be called from different threads.
|
14
|
+
@lock = Mutex.new
|
15
|
+
# we need to be threadsafe about the quit mutation
|
16
|
+
@quit = false
|
17
|
+
@quit_lock = Mutex.new
|
18
|
+
@lastwarn_max_files = 0
|
19
|
+
@discoverer = discoverer
|
20
|
+
@watched_files_collection = watched_files_collection
|
21
|
+
end
|
22
|
+
|
23
|
+
def add_processor(processor)
|
24
|
+
@processor = processor
|
25
|
+
@processor.add_watch(self)
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
29
|
+
def watch(path)
|
30
|
+
synchronized do
|
31
|
+
@discoverer.add_path(path)
|
32
|
+
end
|
33
|
+
# don't return whatever @discoverer.add_path returns
|
34
|
+
return true
|
35
|
+
end
|
36
|
+
|
37
|
+
def discover
|
38
|
+
synchronized do
|
39
|
+
@discoverer.discover
|
40
|
+
end
|
41
|
+
# don't return whatever @discoverer.discover returns
|
42
|
+
return true
|
43
|
+
end
|
44
|
+
|
45
|
+
def subscribe(observer, sincedb_collection)
|
46
|
+
@processor.initialize_handlers(sincedb_collection, observer)
|
47
|
+
|
48
|
+
glob = 0
|
49
|
+
interval = @settings.discover_interval
|
50
|
+
reset_quit
|
51
|
+
until quit?
|
52
|
+
iterate_on_state
|
53
|
+
break if quit?
|
54
|
+
glob += 1
|
55
|
+
if glob == interval
|
56
|
+
discover
|
57
|
+
glob = 0
|
58
|
+
end
|
59
|
+
break if quit?
|
60
|
+
sleep(@settings.stat_interval)
|
61
|
+
end
|
62
|
+
@watched_files_collection.close_all
|
63
|
+
end # def subscribe
|
64
|
+
|
65
|
+
# Read mode processor will handle watched_files in the closed, ignored, watched and active state
|
66
|
+
# differently from Tail mode - see the ReadMode::Processor and TailMode::Processor
|
67
|
+
def iterate_on_state
|
68
|
+
return if @watched_files_collection.empty?
|
69
|
+
synchronized do
|
70
|
+
begin
|
71
|
+
# creates this snapshot of watched_file values just once
|
72
|
+
watched_files = @watched_files_collection.values
|
73
|
+
@processor.process_closed(watched_files)
|
74
|
+
return if quit?
|
75
|
+
@processor.process_ignored(watched_files)
|
76
|
+
return if quit?
|
77
|
+
@processor.process_watched(watched_files)
|
78
|
+
return if quit?
|
79
|
+
@processor.process_active(watched_files)
|
80
|
+
ensure
|
81
|
+
@watched_files_collection.delete(@processor.deletable_filepaths)
|
82
|
+
@processor.deletable_filepaths.clear
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end # def each
|
86
|
+
|
87
|
+
def quit
|
88
|
+
@quit_lock.synchronize do
|
89
|
+
@quit = true
|
90
|
+
end
|
91
|
+
end # def quit
|
92
|
+
|
93
|
+
def quit?
|
94
|
+
@quit_lock.synchronize { @quit }
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def synchronized(&block)
|
100
|
+
@lock.synchronize { block.call }
|
101
|
+
end
|
102
|
+
|
103
|
+
def reset_quit
|
104
|
+
@quit_lock.synchronize { @quit = false }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FileWatch
|
4
|
+
class WatchedFile
|
5
|
+
include InodeMixin # see bootstrap.rb at `if LogStash::Environment.windows?`
|
6
|
+
|
7
|
+
attr_reader :bytes_read, :state, :file, :buffer, :recent_states
|
8
|
+
attr_reader :path, :filestat, :accessed_at, :modified_at, :pathname
|
9
|
+
attr_reader :sdb_key_v1, :last_stat_size, :listener
|
10
|
+
attr_accessor :last_open_warning_at
|
11
|
+
|
12
|
+
# this class represents a file that has been discovered
|
13
|
+
def initialize(pathname, stat, settings)
|
14
|
+
@settings = settings
|
15
|
+
@pathname = Pathname.new(pathname) # given arg pathname might be a string or a Pathname object
|
16
|
+
@path = @pathname.to_path
|
17
|
+
@bytes_read = 0
|
18
|
+
@last_stat_size = 0
|
19
|
+
# the prepare_inode method is sourced from the mixed module above
|
20
|
+
@sdb_key_v1 = InodeStruct.new(*prepare_inode(path, stat))
|
21
|
+
# initial as true means we have not associated this watched_file with a previous sincedb value yet.
|
22
|
+
# and we should read from the beginning if necessary
|
23
|
+
@initial = true
|
24
|
+
@recent_states = [] # keep last 8 states, managed in set_state
|
25
|
+
@state = :watched
|
26
|
+
set_stat(stat) # can change @last_stat_size
|
27
|
+
@listener = nil
|
28
|
+
@last_open_warning_at = nil
|
29
|
+
set_accessed_at
|
30
|
+
end
|
31
|
+
|
32
|
+
def set_listener(observer)
|
33
|
+
@listener = observer.listener_for(@path)
|
34
|
+
end
|
35
|
+
|
36
|
+
def unset_listener
|
37
|
+
@listener = nil
|
38
|
+
end
|
39
|
+
|
40
|
+
def has_listener?
|
41
|
+
!@listener.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
def sincedb_key
|
45
|
+
@sdb_key_v1
|
46
|
+
end
|
47
|
+
|
48
|
+
def initial_completed
|
49
|
+
@initial = false
|
50
|
+
end
|
51
|
+
|
52
|
+
def set_accessed_at
|
53
|
+
@accessed_at = Time.now.to_f
|
54
|
+
end
|
55
|
+
|
56
|
+
def initial?
|
57
|
+
@initial
|
58
|
+
end
|
59
|
+
|
60
|
+
def compressed?
|
61
|
+
@path.end_with?('.gz','.gzip')
|
62
|
+
end
|
63
|
+
|
64
|
+
def size_changed?
|
65
|
+
@last_stat_size != bytes_read
|
66
|
+
end
|
67
|
+
|
68
|
+
def all_read?
|
69
|
+
@last_stat_size == bytes_read
|
70
|
+
end
|
71
|
+
|
72
|
+
def open
|
73
|
+
file_add_opened(FileOpener.open(@path))
|
74
|
+
end
|
75
|
+
|
76
|
+
def file_add_opened(rubyfile)
|
77
|
+
@file = rubyfile
|
78
|
+
@buffer = BufferedTokenizer.new(@settings.delimiter) if @buffer.nil?
|
79
|
+
end
|
80
|
+
|
81
|
+
def file_close
|
82
|
+
return if @file.nil? || @file.closed?
|
83
|
+
@file.close
|
84
|
+
@file = nil
|
85
|
+
end
|
86
|
+
|
87
|
+
def file_seek(amount, whence = IO::SEEK_SET)
|
88
|
+
@file.sysseek(amount, whence)
|
89
|
+
end
|
90
|
+
|
91
|
+
def file_read(amount)
|
92
|
+
set_accessed_at
|
93
|
+
@file.sysread(amount)
|
94
|
+
end
|
95
|
+
|
96
|
+
def file_open?
|
97
|
+
!@file.nil? && !@file.closed?
|
98
|
+
end
|
99
|
+
|
100
|
+
def reset_buffer
|
101
|
+
@buffer.flush
|
102
|
+
end
|
103
|
+
|
104
|
+
def buffer_extract(data)
|
105
|
+
@buffer.extract(data)
|
106
|
+
end
|
107
|
+
|
108
|
+
def increment_bytes_read(delta)
|
109
|
+
return if delta.nil?
|
110
|
+
@bytes_read += delta
|
111
|
+
end
|
112
|
+
|
113
|
+
def update_bytes_read(total_bytes_read)
|
114
|
+
return if total_bytes_read.nil?
|
115
|
+
@bytes_read = total_bytes_read
|
116
|
+
end
|
117
|
+
|
118
|
+
def update_path(_path)
|
119
|
+
@path = _path
|
120
|
+
end
|
121
|
+
|
122
|
+
def update_stat(st)
|
123
|
+
set_stat(st)
|
124
|
+
end
|
125
|
+
|
126
|
+
def activate
|
127
|
+
set_state :active
|
128
|
+
end
|
129
|
+
|
130
|
+
def ignore
|
131
|
+
set_state :ignored
|
132
|
+
@bytes_read = @filestat.size
|
133
|
+
end
|
134
|
+
|
135
|
+
def close
|
136
|
+
set_state :closed
|
137
|
+
end
|
138
|
+
|
139
|
+
def watch
|
140
|
+
set_state :watched
|
141
|
+
end
|
142
|
+
|
143
|
+
def unwatch
|
144
|
+
set_state :unwatched
|
145
|
+
end
|
146
|
+
|
147
|
+
def active?
|
148
|
+
@state == :active
|
149
|
+
end
|
150
|
+
|
151
|
+
def ignored?
|
152
|
+
@state == :ignored
|
153
|
+
end
|
154
|
+
|
155
|
+
def closed?
|
156
|
+
@state == :closed
|
157
|
+
end
|
158
|
+
|
159
|
+
def watched?
|
160
|
+
@state == :watched
|
161
|
+
end
|
162
|
+
|
163
|
+
def unwatched?
|
164
|
+
@state == :unwatched
|
165
|
+
end
|
166
|
+
|
167
|
+
def expiry_close_enabled?
|
168
|
+
!@settings.close_older.nil?
|
169
|
+
end
|
170
|
+
|
171
|
+
def expiry_ignore_enabled?
|
172
|
+
!@settings.ignore_older.nil?
|
173
|
+
end
|
174
|
+
|
175
|
+
def shrunk?
|
176
|
+
@last_stat_size < @bytes_read
|
177
|
+
end
|
178
|
+
|
179
|
+
def grown?
|
180
|
+
@last_stat_size > @bytes_read
|
181
|
+
end
|
182
|
+
|
183
|
+
def restat
|
184
|
+
set_stat(pathname.stat)
|
185
|
+
end
|
186
|
+
|
187
|
+
def set_state(value)
|
188
|
+
@recent_states.shift if @recent_states.size == 8
|
189
|
+
@recent_states << @state
|
190
|
+
@state = value
|
191
|
+
end
|
192
|
+
|
193
|
+
def recent_state_history
|
194
|
+
@recent_states + Array(@state)
|
195
|
+
end
|
196
|
+
|
197
|
+
def file_closable?
|
198
|
+
file_can_close? && all_read?
|
199
|
+
end
|
200
|
+
|
201
|
+
def file_ignorable?
|
202
|
+
return false unless expiry_ignore_enabled?
|
203
|
+
# (Time.now - stat.mtime) <- in jruby, this does int and float
|
204
|
+
# conversions before the subtraction and returns a float.
|
205
|
+
# so use all floats upfront
|
206
|
+
(Time.now.to_f - @modified_at) > @settings.ignore_older
|
207
|
+
end
|
208
|
+
|
209
|
+
def file_can_close?
|
210
|
+
return false unless expiry_close_enabled?
|
211
|
+
(Time.now.to_f - @accessed_at) > @settings.close_older
|
212
|
+
end
|
213
|
+
|
214
|
+
def to_s
|
215
|
+
inspect
|
216
|
+
end
|
217
|
+
|
218
|
+
private
|
219
|
+
|
220
|
+
def set_stat(stat)
|
221
|
+
@modified_at = stat.mtime.to_f
|
222
|
+
@last_stat_size = stat.size
|
223
|
+
@filestat = stat
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module FileWatch
|
3
|
+
class WatchedFilesCollection
|
4
|
+
|
5
|
+
def initialize(settings)
|
6
|
+
@sort_by = settings.file_sort_by # "last_modified" | "path"
|
7
|
+
@sort_direction = settings.file_sort_direction # "asc" | "desc"
|
8
|
+
@sort_method = method("#{@sort_by}_#{@sort_direction}".to_sym)
|
9
|
+
@files = []
|
10
|
+
@pointers = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def add(watched_file)
|
14
|
+
@files << watched_file
|
15
|
+
@sort_method.call
|
16
|
+
end
|
17
|
+
|
18
|
+
def delete(paths)
|
19
|
+
Array(paths).each do |f|
|
20
|
+
index = @pointers.delete(f)
|
21
|
+
@files.delete_at(index)
|
22
|
+
end
|
23
|
+
@sort_method.call
|
24
|
+
end
|
25
|
+
|
26
|
+
def close_all
|
27
|
+
@files.each(&:file_close)
|
28
|
+
end
|
29
|
+
|
30
|
+
def empty?
|
31
|
+
@files.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
def keys
|
35
|
+
@pointers.keys
|
36
|
+
end
|
37
|
+
|
38
|
+
def values
|
39
|
+
@files
|
40
|
+
end
|
41
|
+
|
42
|
+
def watched_file_by_path(path)
|
43
|
+
index = @pointers[path]
|
44
|
+
return nil unless index
|
45
|
+
@files[index]
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def last_modified_asc
|
51
|
+
@files.sort! do |left, right|
|
52
|
+
left.modified_at <=> right.modified_at
|
53
|
+
end
|
54
|
+
refresh_pointers
|
55
|
+
end
|
56
|
+
|
57
|
+
def last_modified_desc
|
58
|
+
@files.sort! do |left, right|
|
59
|
+
right.modified_at <=> left.modified_at
|
60
|
+
end
|
61
|
+
refresh_pointers
|
62
|
+
end
|
63
|
+
|
64
|
+
def path_asc
|
65
|
+
@files.sort! do |left, right|
|
66
|
+
left.path <=> right.path
|
67
|
+
end
|
68
|
+
refresh_pointers
|
69
|
+
end
|
70
|
+
|
71
|
+
def path_desc
|
72
|
+
@files.sort! do |left, right|
|
73
|
+
right.path <=> left.path
|
74
|
+
end
|
75
|
+
refresh_pointers
|
76
|
+
end
|
77
|
+
|
78
|
+
def refresh_pointers
|
79
|
+
@files.each_with_index do |watched_file, index|
|
80
|
+
@pointers[watched_file.path] = index
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "ffi"
|
3
|
+
|
4
|
+
module Winhelper
|
5
|
+
extend FFI::Library
|
6
|
+
|
7
|
+
ffi_lib 'kernel32'
|
8
|
+
ffi_convention :stdcall
|
9
|
+
class FileTime < FFI::Struct
|
10
|
+
layout :lowDateTime, :uint,
|
11
|
+
:highDateTime, :uint
|
12
|
+
end
|
13
|
+
|
14
|
+
#http://msdn.microsoft.com/en-us/library/windows/desktop/aa363788(v=vs.85).aspx
|
15
|
+
class FileInformation < FFI::Struct
|
16
|
+
layout :fileAttributes, :uint, #DWORD dwFileAttributes;
|
17
|
+
:createTime, FileTime, #FILETIME ftCreationTime;
|
18
|
+
:lastAccessTime, FileTime, #FILETIME ftLastAccessTime;
|
19
|
+
:lastWriteTime, FileTime, #FILETIME ftLastWriteTime;
|
20
|
+
:volumeSerialNumber, :uint, #DWORD dwVolumeSerialNumber;
|
21
|
+
:fileSizeHigh, :uint, #DWORD nFileSizeHigh;
|
22
|
+
:fileSizeLow, :uint, #DWORD nFileSizeLow;
|
23
|
+
:numberOfLinks, :uint, #DWORD nNumberOfLinks;
|
24
|
+
:fileIndexHigh, :uint, #DWORD nFileIndexHigh;
|
25
|
+
:fileIndexLow, :uint #DWORD nFileIndexLow;
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
#http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
|
30
|
+
#HANDLE WINAPI CreateFile(_In_ LPCTSTR lpFileName,_In_ DWORD dwDesiredAccess,_In_ DWORD dwShareMode,
|
31
|
+
# _In_opt_ LPSECURITY_ATTRIBUTES lpSecurityAttributes,_In_ DWORD dwCreationDisposition,
|
32
|
+
# _In_ DWORD dwFlagsAndAttributes,_In_opt_ HANDLE hTemplateFile);
|
33
|
+
attach_function :GetOpenFileHandle, :CreateFileA, [:pointer, :uint, :uint, :pointer, :uint, :uint, :pointer], :pointer
|
34
|
+
|
35
|
+
#http://msdn.microsoft.com/en-us/library/windows/desktop/aa364952(v=vs.85).aspx
|
36
|
+
#BOOL WINAPI GetFileInformationByHandle(_In_ HANDLE hFile,_Out_ LPBY_HANDLE_FILE_INFORMATION lpFileInformation);
|
37
|
+
attach_function :GetFileInformationByHandle, [:pointer, :pointer], :int
|
38
|
+
|
39
|
+
attach_function :CloseHandle, [:pointer], :int
|
40
|
+
|
41
|
+
|
42
|
+
def self.GetWindowsUniqueFileIdentifier(path)
|
43
|
+
handle = GetOpenFileHandle(path, 0, 7, nil, 3, 128, nil)
|
44
|
+
fileInfo = Winhelper::FileInformation.new
|
45
|
+
success = GetFileInformationByHandle(handle, fileInfo)
|
46
|
+
CloseHandle(handle)
|
47
|
+
if success == 1
|
48
|
+
#args = [
|
49
|
+
# fileInfo[:fileAttributes], fileInfo[:volumeSerialNumber], fileInfo[:fileSizeHigh], fileInfo[:fileSizeLow],
|
50
|
+
# fileInfo[:numberOfLinks], fileInfo[:fileIndexHigh], fileInfo[:fileIndexLow]
|
51
|
+
# ]
|
52
|
+
#p "Information: %u %u %u %u %u %u %u " % args
|
53
|
+
#this is only guaranteed on NTFS, for ReFS on windows 2012, GetFileInformationByHandleEx should be used with FILE_ID_INFO, which returns a 128 bit identifier
|
54
|
+
return "#{fileInfo[:volumeSerialNumber]}-#{fileInfo[:fileIndexLow]}-#{fileInfo[:fileIndexHigh]}"
|
55
|
+
else
|
56
|
+
#p "cannot retrieve file information, returning path"
|
57
|
+
return path;
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
#fileId = Winhelper.GetWindowsUniqueFileIdentifier('C:\inetpub\logs\LogFiles\W3SVC1\u_ex1fdsadfsadfasdf30612.log')
|
63
|
+
#p "FileId: " + fileId
|
64
|
+
#p "outside function, sleeping"
|
65
|
+
#sleep(10)
|