logstash-input-file 4.0.5 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -3
- data/JAR_VERSION +1 -0
- data/docs/index.asciidoc +195 -37
- data/lib/filewatch/bootstrap.rb +74 -0
- data/lib/filewatch/discoverer.rb +94 -0
- data/lib/filewatch/helper.rb +65 -0
- data/lib/filewatch/observing_base.rb +97 -0
- data/lib/filewatch/observing_read.rb +23 -0
- data/lib/filewatch/observing_tail.rb +22 -0
- data/lib/filewatch/read_mode/handlers/base.rb +81 -0
- data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
- data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
- data/lib/filewatch/read_mode/processor.rb +117 -0
- data/lib/filewatch/settings.rb +67 -0
- data/lib/filewatch/sincedb_collection.rb +215 -0
- data/lib/filewatch/sincedb_record_serializer.rb +70 -0
- data/lib/filewatch/sincedb_value.rb +87 -0
- data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
- data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
- data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
- data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
- data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
- data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
- data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
- data/lib/filewatch/tail_mode/processor.rb +209 -0
- data/lib/filewatch/watch.rb +107 -0
- data/lib/filewatch/watched_file.rb +226 -0
- data/lib/filewatch/watched_files_collection.rb +84 -0
- data/lib/filewatch/winhelper.rb +65 -0
- data/lib/jars/filewatch-1.0.0.jar +0 -0
- data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
- data/lib/logstash/inputs/file.rb +162 -107
- data/lib/logstash/inputs/file_listener.rb +61 -0
- data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
- data/logstash-input-file.gemspec +5 -4
- data/spec/filewatch/buftok_spec.rb +24 -0
- data/spec/filewatch/reading_spec.rb +128 -0
- data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
- data/spec/filewatch/spec_helper.rb +120 -0
- data/spec/filewatch/tailing_spec.rb +440 -0
- data/spec/filewatch/watched_file_spec.rb +38 -0
- data/spec/filewatch/watched_files_collection_spec.rb +73 -0
- data/spec/filewatch/winhelper_spec.rb +22 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/no-final-newline.log +2 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
- data/spec/inputs/file_read_spec.rb +155 -0
- data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
- metadata +96 -28
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util/loggable"
|
3
|
+
|
4
|
+
module FileWatch
|
5
|
+
class Watch
|
6
|
+
include LogStash::Util::Loggable
|
7
|
+
|
8
|
+
attr_accessor :lastwarn_max_files
|
9
|
+
attr_reader :discoverer, :watched_files_collection
|
10
|
+
|
11
|
+
def initialize(discoverer, watched_files_collection, settings)
|
12
|
+
@settings = settings
|
13
|
+
# watch and iterate_on_state can be called from different threads.
|
14
|
+
@lock = Mutex.new
|
15
|
+
# we need to be threadsafe about the quit mutation
|
16
|
+
@quit = false
|
17
|
+
@quit_lock = Mutex.new
|
18
|
+
@lastwarn_max_files = 0
|
19
|
+
@discoverer = discoverer
|
20
|
+
@watched_files_collection = watched_files_collection
|
21
|
+
end
|
22
|
+
|
23
|
+
def add_processor(processor)
|
24
|
+
@processor = processor
|
25
|
+
@processor.add_watch(self)
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
29
|
+
def watch(path)
|
30
|
+
synchronized do
|
31
|
+
@discoverer.add_path(path)
|
32
|
+
end
|
33
|
+
# don't return whatever @discoverer.add_path returns
|
34
|
+
return true
|
35
|
+
end
|
36
|
+
|
37
|
+
def discover
|
38
|
+
synchronized do
|
39
|
+
@discoverer.discover
|
40
|
+
end
|
41
|
+
# don't return whatever @discoverer.discover returns
|
42
|
+
return true
|
43
|
+
end
|
44
|
+
|
45
|
+
def subscribe(observer, sincedb_collection)
|
46
|
+
@processor.initialize_handlers(sincedb_collection, observer)
|
47
|
+
|
48
|
+
glob = 0
|
49
|
+
interval = @settings.discover_interval
|
50
|
+
reset_quit
|
51
|
+
until quit?
|
52
|
+
iterate_on_state
|
53
|
+
break if quit?
|
54
|
+
glob += 1
|
55
|
+
if glob == interval
|
56
|
+
discover
|
57
|
+
glob = 0
|
58
|
+
end
|
59
|
+
break if quit?
|
60
|
+
sleep(@settings.stat_interval)
|
61
|
+
end
|
62
|
+
@watched_files_collection.close_all
|
63
|
+
end # def subscribe
|
64
|
+
|
65
|
+
# Read mode processor will handle watched_files in the closed, ignored, watched and active state
|
66
|
+
# differently from Tail mode - see the ReadMode::Processor and TailMode::Processor
|
67
|
+
def iterate_on_state
|
68
|
+
return if @watched_files_collection.empty?
|
69
|
+
synchronized do
|
70
|
+
begin
|
71
|
+
# creates this snapshot of watched_file values just once
|
72
|
+
watched_files = @watched_files_collection.values
|
73
|
+
@processor.process_closed(watched_files)
|
74
|
+
return if quit?
|
75
|
+
@processor.process_ignored(watched_files)
|
76
|
+
return if quit?
|
77
|
+
@processor.process_watched(watched_files)
|
78
|
+
return if quit?
|
79
|
+
@processor.process_active(watched_files)
|
80
|
+
ensure
|
81
|
+
@watched_files_collection.delete(@processor.deletable_filepaths)
|
82
|
+
@processor.deletable_filepaths.clear
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end # def each
|
86
|
+
|
87
|
+
def quit
|
88
|
+
@quit_lock.synchronize do
|
89
|
+
@quit = true
|
90
|
+
end
|
91
|
+
end # def quit
|
92
|
+
|
93
|
+
def quit?
|
94
|
+
@quit_lock.synchronize { @quit }
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
def synchronized(&block)
|
100
|
+
@lock.synchronize { block.call }
|
101
|
+
end
|
102
|
+
|
103
|
+
def reset_quit
|
104
|
+
@quit_lock.synchronize { @quit = false }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FileWatch
|
4
|
+
class WatchedFile
|
5
|
+
include InodeMixin # see bootstrap.rb at `if LogStash::Environment.windows?`
|
6
|
+
|
7
|
+
attr_reader :bytes_read, :state, :file, :buffer, :recent_states
|
8
|
+
attr_reader :path, :filestat, :accessed_at, :modified_at, :pathname
|
9
|
+
attr_reader :sdb_key_v1, :last_stat_size, :listener
|
10
|
+
attr_accessor :last_open_warning_at
|
11
|
+
|
12
|
+
# this class represents a file that has been discovered
|
13
|
+
def initialize(pathname, stat, settings)
|
14
|
+
@settings = settings
|
15
|
+
@pathname = Pathname.new(pathname) # given arg pathname might be a string or a Pathname object
|
16
|
+
@path = @pathname.to_path
|
17
|
+
@bytes_read = 0
|
18
|
+
@last_stat_size = 0
|
19
|
+
# the prepare_inode method is sourced from the mixed module above
|
20
|
+
@sdb_key_v1 = InodeStruct.new(*prepare_inode(path, stat))
|
21
|
+
# initial as true means we have not associated this watched_file with a previous sincedb value yet.
|
22
|
+
# and we should read from the beginning if necessary
|
23
|
+
@initial = true
|
24
|
+
@recent_states = [] # keep last 8 states, managed in set_state
|
25
|
+
@state = :watched
|
26
|
+
set_stat(stat) # can change @last_stat_size
|
27
|
+
@listener = nil
|
28
|
+
@last_open_warning_at = nil
|
29
|
+
set_accessed_at
|
30
|
+
end
|
31
|
+
|
32
|
+
def set_listener(observer)
|
33
|
+
@listener = observer.listener_for(@path)
|
34
|
+
end
|
35
|
+
|
36
|
+
def unset_listener
|
37
|
+
@listener = nil
|
38
|
+
end
|
39
|
+
|
40
|
+
def has_listener?
|
41
|
+
!@listener.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
def sincedb_key
|
45
|
+
@sdb_key_v1
|
46
|
+
end
|
47
|
+
|
48
|
+
def initial_completed
|
49
|
+
@initial = false
|
50
|
+
end
|
51
|
+
|
52
|
+
def set_accessed_at
|
53
|
+
@accessed_at = Time.now.to_f
|
54
|
+
end
|
55
|
+
|
56
|
+
def initial?
|
57
|
+
@initial
|
58
|
+
end
|
59
|
+
|
60
|
+
def compressed?
|
61
|
+
@path.end_with?('.gz','.gzip')
|
62
|
+
end
|
63
|
+
|
64
|
+
def size_changed?
|
65
|
+
@last_stat_size != bytes_read
|
66
|
+
end
|
67
|
+
|
68
|
+
def all_read?
|
69
|
+
@last_stat_size == bytes_read
|
70
|
+
end
|
71
|
+
|
72
|
+
def open
|
73
|
+
file_add_opened(FileOpener.open(@path))
|
74
|
+
end
|
75
|
+
|
76
|
+
def file_add_opened(rubyfile)
|
77
|
+
@file = rubyfile
|
78
|
+
@buffer = BufferedTokenizer.new(@settings.delimiter) if @buffer.nil?
|
79
|
+
end
|
80
|
+
|
81
|
+
def file_close
|
82
|
+
return if @file.nil? || @file.closed?
|
83
|
+
@file.close
|
84
|
+
@file = nil
|
85
|
+
end
|
86
|
+
|
87
|
+
def file_seek(amount, whence = IO::SEEK_SET)
|
88
|
+
@file.sysseek(amount, whence)
|
89
|
+
end
|
90
|
+
|
91
|
+
def file_read(amount)
|
92
|
+
set_accessed_at
|
93
|
+
@file.sysread(amount)
|
94
|
+
end
|
95
|
+
|
96
|
+
def file_open?
|
97
|
+
!@file.nil? && !@file.closed?
|
98
|
+
end
|
99
|
+
|
100
|
+
def reset_buffer
|
101
|
+
@buffer.flush
|
102
|
+
end
|
103
|
+
|
104
|
+
def buffer_extract(data)
|
105
|
+
@buffer.extract(data)
|
106
|
+
end
|
107
|
+
|
108
|
+
def increment_bytes_read(delta)
|
109
|
+
return if delta.nil?
|
110
|
+
@bytes_read += delta
|
111
|
+
end
|
112
|
+
|
113
|
+
def update_bytes_read(total_bytes_read)
|
114
|
+
return if total_bytes_read.nil?
|
115
|
+
@bytes_read = total_bytes_read
|
116
|
+
end
|
117
|
+
|
118
|
+
def update_path(_path)
|
119
|
+
@path = _path
|
120
|
+
end
|
121
|
+
|
122
|
+
def update_stat(st)
|
123
|
+
set_stat(st)
|
124
|
+
end
|
125
|
+
|
126
|
+
def activate
|
127
|
+
set_state :active
|
128
|
+
end
|
129
|
+
|
130
|
+
def ignore
|
131
|
+
set_state :ignored
|
132
|
+
@bytes_read = @filestat.size
|
133
|
+
end
|
134
|
+
|
135
|
+
def close
|
136
|
+
set_state :closed
|
137
|
+
end
|
138
|
+
|
139
|
+
def watch
|
140
|
+
set_state :watched
|
141
|
+
end
|
142
|
+
|
143
|
+
def unwatch
|
144
|
+
set_state :unwatched
|
145
|
+
end
|
146
|
+
|
147
|
+
def active?
|
148
|
+
@state == :active
|
149
|
+
end
|
150
|
+
|
151
|
+
def ignored?
|
152
|
+
@state == :ignored
|
153
|
+
end
|
154
|
+
|
155
|
+
def closed?
|
156
|
+
@state == :closed
|
157
|
+
end
|
158
|
+
|
159
|
+
def watched?
|
160
|
+
@state == :watched
|
161
|
+
end
|
162
|
+
|
163
|
+
def unwatched?
|
164
|
+
@state == :unwatched
|
165
|
+
end
|
166
|
+
|
167
|
+
def expiry_close_enabled?
|
168
|
+
!@settings.close_older.nil?
|
169
|
+
end
|
170
|
+
|
171
|
+
def expiry_ignore_enabled?
|
172
|
+
!@settings.ignore_older.nil?
|
173
|
+
end
|
174
|
+
|
175
|
+
def shrunk?
|
176
|
+
@last_stat_size < @bytes_read
|
177
|
+
end
|
178
|
+
|
179
|
+
def grown?
|
180
|
+
@last_stat_size > @bytes_read
|
181
|
+
end
|
182
|
+
|
183
|
+
def restat
|
184
|
+
set_stat(pathname.stat)
|
185
|
+
end
|
186
|
+
|
187
|
+
def set_state(value)
|
188
|
+
@recent_states.shift if @recent_states.size == 8
|
189
|
+
@recent_states << @state
|
190
|
+
@state = value
|
191
|
+
end
|
192
|
+
|
193
|
+
def recent_state_history
|
194
|
+
@recent_states + Array(@state)
|
195
|
+
end
|
196
|
+
|
197
|
+
def file_closable?
|
198
|
+
file_can_close? && all_read?
|
199
|
+
end
|
200
|
+
|
201
|
+
def file_ignorable?
|
202
|
+
return false unless expiry_ignore_enabled?
|
203
|
+
# (Time.now - stat.mtime) <- in jruby, this does int and float
|
204
|
+
# conversions before the subtraction and returns a float.
|
205
|
+
# so use all floats upfront
|
206
|
+
(Time.now.to_f - @modified_at) > @settings.ignore_older
|
207
|
+
end
|
208
|
+
|
209
|
+
def file_can_close?
|
210
|
+
return false unless expiry_close_enabled?
|
211
|
+
(Time.now.to_f - @accessed_at) > @settings.close_older
|
212
|
+
end
|
213
|
+
|
214
|
+
def to_s
|
215
|
+
inspect
|
216
|
+
end
|
217
|
+
|
218
|
+
private
|
219
|
+
|
220
|
+
def set_stat(stat)
|
221
|
+
@modified_at = stat.mtime.to_f
|
222
|
+
@last_stat_size = stat.size
|
223
|
+
@filestat = stat
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module FileWatch
|
3
|
+
class WatchedFilesCollection
|
4
|
+
|
5
|
+
def initialize(settings)
|
6
|
+
@sort_by = settings.file_sort_by # "last_modified" | "path"
|
7
|
+
@sort_direction = settings.file_sort_direction # "asc" | "desc"
|
8
|
+
@sort_method = method("#{@sort_by}_#{@sort_direction}".to_sym)
|
9
|
+
@files = []
|
10
|
+
@pointers = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def add(watched_file)
|
14
|
+
@files << watched_file
|
15
|
+
@sort_method.call
|
16
|
+
end
|
17
|
+
|
18
|
+
def delete(paths)
|
19
|
+
Array(paths).each do |f|
|
20
|
+
index = @pointers.delete(f)
|
21
|
+
@files.delete_at(index)
|
22
|
+
end
|
23
|
+
@sort_method.call
|
24
|
+
end
|
25
|
+
|
26
|
+
def close_all
|
27
|
+
@files.each(&:file_close)
|
28
|
+
end
|
29
|
+
|
30
|
+
def empty?
|
31
|
+
@files.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
def keys
|
35
|
+
@pointers.keys
|
36
|
+
end
|
37
|
+
|
38
|
+
def values
|
39
|
+
@files
|
40
|
+
end
|
41
|
+
|
42
|
+
def watched_file_by_path(path)
|
43
|
+
index = @pointers[path]
|
44
|
+
return nil unless index
|
45
|
+
@files[index]
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def last_modified_asc
|
51
|
+
@files.sort! do |left, right|
|
52
|
+
left.modified_at <=> right.modified_at
|
53
|
+
end
|
54
|
+
refresh_pointers
|
55
|
+
end
|
56
|
+
|
57
|
+
def last_modified_desc
|
58
|
+
@files.sort! do |left, right|
|
59
|
+
right.modified_at <=> left.modified_at
|
60
|
+
end
|
61
|
+
refresh_pointers
|
62
|
+
end
|
63
|
+
|
64
|
+
def path_asc
|
65
|
+
@files.sort! do |left, right|
|
66
|
+
left.path <=> right.path
|
67
|
+
end
|
68
|
+
refresh_pointers
|
69
|
+
end
|
70
|
+
|
71
|
+
def path_desc
|
72
|
+
@files.sort! do |left, right|
|
73
|
+
right.path <=> left.path
|
74
|
+
end
|
75
|
+
refresh_pointers
|
76
|
+
end
|
77
|
+
|
78
|
+
def refresh_pointers
|
79
|
+
@files.each_with_index do |watched_file, index|
|
80
|
+
@pointers[watched_file.path] = index
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "ffi"
|
3
|
+
|
4
|
+
module Winhelper
|
5
|
+
extend FFI::Library
|
6
|
+
|
7
|
+
ffi_lib 'kernel32'
|
8
|
+
ffi_convention :stdcall
|
9
|
+
class FileTime < FFI::Struct
|
10
|
+
layout :lowDateTime, :uint,
|
11
|
+
:highDateTime, :uint
|
12
|
+
end
|
13
|
+
|
14
|
+
#http://msdn.microsoft.com/en-us/library/windows/desktop/aa363788(v=vs.85).aspx
|
15
|
+
class FileInformation < FFI::Struct
|
16
|
+
layout :fileAttributes, :uint, #DWORD dwFileAttributes;
|
17
|
+
:createTime, FileTime, #FILETIME ftCreationTime;
|
18
|
+
:lastAccessTime, FileTime, #FILETIME ftLastAccessTime;
|
19
|
+
:lastWriteTime, FileTime, #FILETIME ftLastWriteTime;
|
20
|
+
:volumeSerialNumber, :uint, #DWORD dwVolumeSerialNumber;
|
21
|
+
:fileSizeHigh, :uint, #DWORD nFileSizeHigh;
|
22
|
+
:fileSizeLow, :uint, #DWORD nFileSizeLow;
|
23
|
+
:numberOfLinks, :uint, #DWORD nNumberOfLinks;
|
24
|
+
:fileIndexHigh, :uint, #DWORD nFileIndexHigh;
|
25
|
+
:fileIndexLow, :uint #DWORD nFileIndexLow;
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
#http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
|
30
|
+
#HANDLE WINAPI CreateFile(_In_ LPCTSTR lpFileName,_In_ DWORD dwDesiredAccess,_In_ DWORD dwShareMode,
|
31
|
+
# _In_opt_ LPSECURITY_ATTRIBUTES lpSecurityAttributes,_In_ DWORD dwCreationDisposition,
|
32
|
+
# _In_ DWORD dwFlagsAndAttributes,_In_opt_ HANDLE hTemplateFile);
|
33
|
+
attach_function :GetOpenFileHandle, :CreateFileA, [:pointer, :uint, :uint, :pointer, :uint, :uint, :pointer], :pointer
|
34
|
+
|
35
|
+
#http://msdn.microsoft.com/en-us/library/windows/desktop/aa364952(v=vs.85).aspx
|
36
|
+
#BOOL WINAPI GetFileInformationByHandle(_In_ HANDLE hFile,_Out_ LPBY_HANDLE_FILE_INFORMATION lpFileInformation);
|
37
|
+
attach_function :GetFileInformationByHandle, [:pointer, :pointer], :int
|
38
|
+
|
39
|
+
attach_function :CloseHandle, [:pointer], :int
|
40
|
+
|
41
|
+
|
42
|
+
def self.GetWindowsUniqueFileIdentifier(path)
|
43
|
+
handle = GetOpenFileHandle(path, 0, 7, nil, 3, 128, nil)
|
44
|
+
fileInfo = Winhelper::FileInformation.new
|
45
|
+
success = GetFileInformationByHandle(handle, fileInfo)
|
46
|
+
CloseHandle(handle)
|
47
|
+
if success == 1
|
48
|
+
#args = [
|
49
|
+
# fileInfo[:fileAttributes], fileInfo[:volumeSerialNumber], fileInfo[:fileSizeHigh], fileInfo[:fileSizeLow],
|
50
|
+
# fileInfo[:numberOfLinks], fileInfo[:fileIndexHigh], fileInfo[:fileIndexLow]
|
51
|
+
# ]
|
52
|
+
#p "Information: %u %u %u %u %u %u %u " % args
|
53
|
+
#this is only guaranteed on NTFS, for ReFS on windows 2012, GetFileInformationByHandleEx should be used with FILE_ID_INFO, which returns a 128 bit identifier
|
54
|
+
return "#{fileInfo[:volumeSerialNumber]}-#{fileInfo[:fileIndexLow]}-#{fileInfo[:fileIndexHigh]}"
|
55
|
+
else
|
56
|
+
#p "cannot retrieve file information, returning path"
|
57
|
+
return path;
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
#fileId = Winhelper.GetWindowsUniqueFileIdentifier('C:\inetpub\logs\LogFiles\W3SVC1\u_ex1fdsadfsadfasdf30612.log')
|
63
|
+
#p "FileId: " + fileId
|
64
|
+
#p "outside function, sleeping"
|
65
|
+
#sleep(10)
|