logstash-input-file 4.0.5 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -3
  3. data/JAR_VERSION +1 -0
  4. data/docs/index.asciidoc +195 -37
  5. data/lib/filewatch/bootstrap.rb +74 -0
  6. data/lib/filewatch/discoverer.rb +94 -0
  7. data/lib/filewatch/helper.rb +65 -0
  8. data/lib/filewatch/observing_base.rb +97 -0
  9. data/lib/filewatch/observing_read.rb +23 -0
  10. data/lib/filewatch/observing_tail.rb +22 -0
  11. data/lib/filewatch/read_mode/handlers/base.rb +81 -0
  12. data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
  13. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
  14. data/lib/filewatch/read_mode/processor.rb +117 -0
  15. data/lib/filewatch/settings.rb +67 -0
  16. data/lib/filewatch/sincedb_collection.rb +215 -0
  17. data/lib/filewatch/sincedb_record_serializer.rb +70 -0
  18. data/lib/filewatch/sincedb_value.rb +87 -0
  19. data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
  20. data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
  21. data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
  22. data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
  23. data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
  24. data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
  25. data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
  26. data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
  27. data/lib/filewatch/tail_mode/processor.rb +209 -0
  28. data/lib/filewatch/watch.rb +107 -0
  29. data/lib/filewatch/watched_file.rb +226 -0
  30. data/lib/filewatch/watched_files_collection.rb +84 -0
  31. data/lib/filewatch/winhelper.rb +65 -0
  32. data/lib/jars/filewatch-1.0.0.jar +0 -0
  33. data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
  34. data/lib/logstash/inputs/file.rb +162 -107
  35. data/lib/logstash/inputs/file_listener.rb +61 -0
  36. data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
  37. data/logstash-input-file.gemspec +5 -4
  38. data/spec/filewatch/buftok_spec.rb +24 -0
  39. data/spec/filewatch/reading_spec.rb +128 -0
  40. data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
  41. data/spec/filewatch/spec_helper.rb +120 -0
  42. data/spec/filewatch/tailing_spec.rb +440 -0
  43. data/spec/filewatch/watched_file_spec.rb +38 -0
  44. data/spec/filewatch/watched_files_collection_spec.rb +73 -0
  45. data/spec/filewatch/winhelper_spec.rb +22 -0
  46. data/spec/fixtures/compressed.log.gz +0 -0
  47. data/spec/fixtures/compressed.log.gzip +0 -0
  48. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  49. data/spec/fixtures/no-final-newline.log +2 -0
  50. data/spec/fixtures/uncompressed.log +2 -0
  51. data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
  52. data/spec/inputs/file_read_spec.rb +155 -0
  53. data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
  54. metadata +96 -28
@@ -0,0 +1,107 @@
1
+ # encoding: utf-8
2
+ require "logstash/util/loggable"
3
+
4
+ module FileWatch
5
+ class Watch
6
+ include LogStash::Util::Loggable
7
+
8
+ attr_accessor :lastwarn_max_files
9
+ attr_reader :discoverer, :watched_files_collection
10
+
11
+ def initialize(discoverer, watched_files_collection, settings)
12
+ @settings = settings
13
+ # watch and iterate_on_state can be called from different threads.
14
+ @lock = Mutex.new
15
+ # we need to be threadsafe about the quit mutation
16
+ @quit = false
17
+ @quit_lock = Mutex.new
18
+ @lastwarn_max_files = 0
19
+ @discoverer = discoverer
20
+ @watched_files_collection = watched_files_collection
21
+ end
22
+
23
+ def add_processor(processor)
24
+ @processor = processor
25
+ @processor.add_watch(self)
26
+ self
27
+ end
28
+
29
+ def watch(path)
30
+ synchronized do
31
+ @discoverer.add_path(path)
32
+ end
33
+ # don't return whatever @discoverer.add_path returns
34
+ return true
35
+ end
36
+
37
+ def discover
38
+ synchronized do
39
+ @discoverer.discover
40
+ end
41
+ # don't return whatever @discoverer.discover returns
42
+ return true
43
+ end
44
+
45
+ def subscribe(observer, sincedb_collection)
46
+ @processor.initialize_handlers(sincedb_collection, observer)
47
+
48
+ glob = 0
49
+ interval = @settings.discover_interval
50
+ reset_quit
51
+ until quit?
52
+ iterate_on_state
53
+ break if quit?
54
+ glob += 1
55
+ if glob == interval
56
+ discover
57
+ glob = 0
58
+ end
59
+ break if quit?
60
+ sleep(@settings.stat_interval)
61
+ end
62
+ @watched_files_collection.close_all
63
+ end # def subscribe
64
+
65
+ # Read mode processor will handle watched_files in the closed, ignored, watched and active state
66
+ # differently from Tail mode - see the ReadMode::Processor and TailMode::Processor
67
+ def iterate_on_state
68
+ return if @watched_files_collection.empty?
69
+ synchronized do
70
+ begin
71
+ # creates this snapshot of watched_file values just once
72
+ watched_files = @watched_files_collection.values
73
+ @processor.process_closed(watched_files)
74
+ return if quit?
75
+ @processor.process_ignored(watched_files)
76
+ return if quit?
77
+ @processor.process_watched(watched_files)
78
+ return if quit?
79
+ @processor.process_active(watched_files)
80
+ ensure
81
+ @watched_files_collection.delete(@processor.deletable_filepaths)
82
+ @processor.deletable_filepaths.clear
83
+ end
84
+ end
85
+ end # def each
86
+
87
+ def quit
88
+ @quit_lock.synchronize do
89
+ @quit = true
90
+ end
91
+ end # def quit
92
+
93
+ def quit?
94
+ @quit_lock.synchronize { @quit }
95
+ end
96
+
97
+ private
98
+
99
+ def synchronized(&block)
100
+ @lock.synchronize { block.call }
101
+ end
102
+
103
+ def reset_quit
104
+ @quit_lock.synchronize { @quit = false }
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,226 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch
4
+ class WatchedFile
5
+ include InodeMixin # see bootstrap.rb at `if LogStash::Environment.windows?`
6
+
7
+ attr_reader :bytes_read, :state, :file, :buffer, :recent_states
8
+ attr_reader :path, :filestat, :accessed_at, :modified_at, :pathname
9
+ attr_reader :sdb_key_v1, :last_stat_size, :listener
10
+ attr_accessor :last_open_warning_at
11
+
12
+ # this class represents a file that has been discovered
13
+ def initialize(pathname, stat, settings)
14
+ @settings = settings
15
+ @pathname = Pathname.new(pathname) # given arg pathname might be a string or a Pathname object
16
+ @path = @pathname.to_path
17
+ @bytes_read = 0
18
+ @last_stat_size = 0
19
+ # the prepare_inode method is sourced from the mixed module above
20
+ @sdb_key_v1 = InodeStruct.new(*prepare_inode(path, stat))
21
+ # initial as true means we have not associated this watched_file with a previous sincedb value yet.
22
+ # and we should read from the beginning if necessary
23
+ @initial = true
24
+ @recent_states = [] # keep last 8 states, managed in set_state
25
+ @state = :watched
26
+ set_stat(stat) # can change @last_stat_size
27
+ @listener = nil
28
+ @last_open_warning_at = nil
29
+ set_accessed_at
30
+ end
31
+
32
+ def set_listener(observer)
33
+ @listener = observer.listener_for(@path)
34
+ end
35
+
36
+ def unset_listener
37
+ @listener = nil
38
+ end
39
+
40
+ def has_listener?
41
+ !@listener.nil?
42
+ end
43
+
44
+ def sincedb_key
45
+ @sdb_key_v1
46
+ end
47
+
48
+ def initial_completed
49
+ @initial = false
50
+ end
51
+
52
+ def set_accessed_at
53
+ @accessed_at = Time.now.to_f
54
+ end
55
+
56
+ def initial?
57
+ @initial
58
+ end
59
+
60
+ def compressed?
61
+ @path.end_with?('.gz','.gzip')
62
+ end
63
+
64
+ def size_changed?
65
+ @last_stat_size != bytes_read
66
+ end
67
+
68
+ def all_read?
69
+ @last_stat_size == bytes_read
70
+ end
71
+
72
+ def open
73
+ file_add_opened(FileOpener.open(@path))
74
+ end
75
+
76
+ def file_add_opened(rubyfile)
77
+ @file = rubyfile
78
+ @buffer = BufferedTokenizer.new(@settings.delimiter) if @buffer.nil?
79
+ end
80
+
81
+ def file_close
82
+ return if @file.nil? || @file.closed?
83
+ @file.close
84
+ @file = nil
85
+ end
86
+
87
+ def file_seek(amount, whence = IO::SEEK_SET)
88
+ @file.sysseek(amount, whence)
89
+ end
90
+
91
+ def file_read(amount)
92
+ set_accessed_at
93
+ @file.sysread(amount)
94
+ end
95
+
96
+ def file_open?
97
+ !@file.nil? && !@file.closed?
98
+ end
99
+
100
+ def reset_buffer
101
+ @buffer.flush
102
+ end
103
+
104
+ def buffer_extract(data)
105
+ @buffer.extract(data)
106
+ end
107
+
108
+ def increment_bytes_read(delta)
109
+ return if delta.nil?
110
+ @bytes_read += delta
111
+ end
112
+
113
+ def update_bytes_read(total_bytes_read)
114
+ return if total_bytes_read.nil?
115
+ @bytes_read = total_bytes_read
116
+ end
117
+
118
+ def update_path(_path)
119
+ @path = _path
120
+ end
121
+
122
+ def update_stat(st)
123
+ set_stat(st)
124
+ end
125
+
126
+ def activate
127
+ set_state :active
128
+ end
129
+
130
+ def ignore
131
+ set_state :ignored
132
+ @bytes_read = @filestat.size
133
+ end
134
+
135
+ def close
136
+ set_state :closed
137
+ end
138
+
139
+ def watch
140
+ set_state :watched
141
+ end
142
+
143
+ def unwatch
144
+ set_state :unwatched
145
+ end
146
+
147
+ def active?
148
+ @state == :active
149
+ end
150
+
151
+ def ignored?
152
+ @state == :ignored
153
+ end
154
+
155
+ def closed?
156
+ @state == :closed
157
+ end
158
+
159
+ def watched?
160
+ @state == :watched
161
+ end
162
+
163
+ def unwatched?
164
+ @state == :unwatched
165
+ end
166
+
167
+ def expiry_close_enabled?
168
+ !@settings.close_older.nil?
169
+ end
170
+
171
+ def expiry_ignore_enabled?
172
+ !@settings.ignore_older.nil?
173
+ end
174
+
175
+ def shrunk?
176
+ @last_stat_size < @bytes_read
177
+ end
178
+
179
+ def grown?
180
+ @last_stat_size > @bytes_read
181
+ end
182
+
183
+ def restat
184
+ set_stat(pathname.stat)
185
+ end
186
+
187
+ def set_state(value)
188
+ @recent_states.shift if @recent_states.size == 8
189
+ @recent_states << @state
190
+ @state = value
191
+ end
192
+
193
+ def recent_state_history
194
+ @recent_states + Array(@state)
195
+ end
196
+
197
+ def file_closable?
198
+ file_can_close? && all_read?
199
+ end
200
+
201
+ def file_ignorable?
202
+ return false unless expiry_ignore_enabled?
203
+ # (Time.now - stat.mtime) <- in jruby, this does int and float
204
+ # conversions before the subtraction and returns a float.
205
+ # so use all floats upfront
206
+ (Time.now.to_f - @modified_at) > @settings.ignore_older
207
+ end
208
+
209
+ def file_can_close?
210
+ return false unless expiry_close_enabled?
211
+ (Time.now.to_f - @accessed_at) > @settings.close_older
212
+ end
213
+
214
+ def to_s
215
+ inspect
216
+ end
217
+
218
+ private
219
+
220
+ def set_stat(stat)
221
+ @modified_at = stat.mtime.to_f
222
+ @last_stat_size = stat.size
223
+ @filestat = stat
224
+ end
225
+ end
226
+ end
@@ -0,0 +1,84 @@
1
+ # encoding: utf-8
2
+ module FileWatch
3
+ class WatchedFilesCollection
4
+
5
+ def initialize(settings)
6
+ @sort_by = settings.file_sort_by # "last_modified" | "path"
7
+ @sort_direction = settings.file_sort_direction # "asc" | "desc"
8
+ @sort_method = method("#{@sort_by}_#{@sort_direction}".to_sym)
9
+ @files = []
10
+ @pointers = {}
11
+ end
12
+
13
+ def add(watched_file)
14
+ @files << watched_file
15
+ @sort_method.call
16
+ end
17
+
18
+ def delete(paths)
19
+ Array(paths).each do |f|
20
+ index = @pointers.delete(f)
21
+ @files.delete_at(index)
22
+ end
23
+ @sort_method.call
24
+ end
25
+
26
+ def close_all
27
+ @files.each(&:file_close)
28
+ end
29
+
30
+ def empty?
31
+ @files.empty?
32
+ end
33
+
34
+ def keys
35
+ @pointers.keys
36
+ end
37
+
38
+ def values
39
+ @files
40
+ end
41
+
42
+ def watched_file_by_path(path)
43
+ index = @pointers[path]
44
+ return nil unless index
45
+ @files[index]
46
+ end
47
+
48
+ private
49
+
50
+ def last_modified_asc
51
+ @files.sort! do |left, right|
52
+ left.modified_at <=> right.modified_at
53
+ end
54
+ refresh_pointers
55
+ end
56
+
57
+ def last_modified_desc
58
+ @files.sort! do |left, right|
59
+ right.modified_at <=> left.modified_at
60
+ end
61
+ refresh_pointers
62
+ end
63
+
64
+ def path_asc
65
+ @files.sort! do |left, right|
66
+ left.path <=> right.path
67
+ end
68
+ refresh_pointers
69
+ end
70
+
71
+ def path_desc
72
+ @files.sort! do |left, right|
73
+ right.path <=> left.path
74
+ end
75
+ refresh_pointers
76
+ end
77
+
78
+ def refresh_pointers
79
+ @files.each_with_index do |watched_file, index|
80
+ @pointers[watched_file.path] = index
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+ require "ffi"
3
+
4
+ module Winhelper
5
+ extend FFI::Library
6
+
7
+ ffi_lib 'kernel32'
8
+ ffi_convention :stdcall
9
+ class FileTime < FFI::Struct
10
+ layout :lowDateTime, :uint,
11
+ :highDateTime, :uint
12
+ end
13
+
14
+ #http://msdn.microsoft.com/en-us/library/windows/desktop/aa363788(v=vs.85).aspx
15
+ class FileInformation < FFI::Struct
16
+ layout :fileAttributes, :uint, #DWORD dwFileAttributes;
17
+ :createTime, FileTime, #FILETIME ftCreationTime;
18
+ :lastAccessTime, FileTime, #FILETIME ftLastAccessTime;
19
+ :lastWriteTime, FileTime, #FILETIME ftLastWriteTime;
20
+ :volumeSerialNumber, :uint, #DWORD dwVolumeSerialNumber;
21
+ :fileSizeHigh, :uint, #DWORD nFileSizeHigh;
22
+ :fileSizeLow, :uint, #DWORD nFileSizeLow;
23
+ :numberOfLinks, :uint, #DWORD nNumberOfLinks;
24
+ :fileIndexHigh, :uint, #DWORD nFileIndexHigh;
25
+ :fileIndexLow, :uint #DWORD nFileIndexLow;
26
+ end
27
+
28
+
29
+ #http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
30
+ #HANDLE WINAPI CreateFile(_In_ LPCTSTR lpFileName,_In_ DWORD dwDesiredAccess,_In_ DWORD dwShareMode,
31
+ # _In_opt_ LPSECURITY_ATTRIBUTES lpSecurityAttributes,_In_ DWORD dwCreationDisposition,
32
+ # _In_ DWORD dwFlagsAndAttributes,_In_opt_ HANDLE hTemplateFile);
33
+ attach_function :GetOpenFileHandle, :CreateFileA, [:pointer, :uint, :uint, :pointer, :uint, :uint, :pointer], :pointer
34
+
35
+ #http://msdn.microsoft.com/en-us/library/windows/desktop/aa364952(v=vs.85).aspx
36
+ #BOOL WINAPI GetFileInformationByHandle(_In_ HANDLE hFile,_Out_ LPBY_HANDLE_FILE_INFORMATION lpFileInformation);
37
+ attach_function :GetFileInformationByHandle, [:pointer, :pointer], :int
38
+
39
+ attach_function :CloseHandle, [:pointer], :int
40
+
41
+
42
+ def self.GetWindowsUniqueFileIdentifier(path)
43
+ handle = GetOpenFileHandle(path, 0, 7, nil, 3, 128, nil)
44
+ fileInfo = Winhelper::FileInformation.new
45
+ success = GetFileInformationByHandle(handle, fileInfo)
46
+ CloseHandle(handle)
47
+ if success == 1
48
+ #args = [
49
+ # fileInfo[:fileAttributes], fileInfo[:volumeSerialNumber], fileInfo[:fileSizeHigh], fileInfo[:fileSizeLow],
50
+ # fileInfo[:numberOfLinks], fileInfo[:fileIndexHigh], fileInfo[:fileIndexLow]
51
+ # ]
52
+ #p "Information: %u %u %u %u %u %u %u " % args
53
+ #this is only guaranteed on NTFS, for ReFS on windows 2012, GetFileInformationByHandleEx should be used with FILE_ID_INFO, which returns a 128 bit identifier
54
+ return "#{fileInfo[:volumeSerialNumber]}-#{fileInfo[:fileIndexLow]}-#{fileInfo[:fileIndexHigh]}"
55
+ else
56
+ #p "cannot retrieve file information, returning path"
57
+ return path;
58
+ end
59
+ end
60
+ end
61
+
62
+ #fileId = Winhelper.GetWindowsUniqueFileIdentifier('C:\inetpub\logs\LogFiles\W3SVC1\u_ex1fdsadfsadfasdf30612.log')
63
+ #p "FileId: " + fileId
64
+ #p "outside function, sleeping"
65
+ #sleep(10)