logstash-input-file 4.0.5 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -3
  3. data/JAR_VERSION +1 -0
  4. data/docs/index.asciidoc +195 -37
  5. data/lib/filewatch/bootstrap.rb +74 -0
  6. data/lib/filewatch/discoverer.rb +94 -0
  7. data/lib/filewatch/helper.rb +65 -0
  8. data/lib/filewatch/observing_base.rb +97 -0
  9. data/lib/filewatch/observing_read.rb +23 -0
  10. data/lib/filewatch/observing_tail.rb +22 -0
  11. data/lib/filewatch/read_mode/handlers/base.rb +81 -0
  12. data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
  13. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
  14. data/lib/filewatch/read_mode/processor.rb +117 -0
  15. data/lib/filewatch/settings.rb +67 -0
  16. data/lib/filewatch/sincedb_collection.rb +215 -0
  17. data/lib/filewatch/sincedb_record_serializer.rb +70 -0
  18. data/lib/filewatch/sincedb_value.rb +87 -0
  19. data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
  20. data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
  21. data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
  22. data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
  23. data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
  24. data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
  25. data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
  26. data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
  27. data/lib/filewatch/tail_mode/processor.rb +209 -0
  28. data/lib/filewatch/watch.rb +107 -0
  29. data/lib/filewatch/watched_file.rb +226 -0
  30. data/lib/filewatch/watched_files_collection.rb +84 -0
  31. data/lib/filewatch/winhelper.rb +65 -0
  32. data/lib/jars/filewatch-1.0.0.jar +0 -0
  33. data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
  34. data/lib/logstash/inputs/file.rb +162 -107
  35. data/lib/logstash/inputs/file_listener.rb +61 -0
  36. data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
  37. data/logstash-input-file.gemspec +5 -4
  38. data/spec/filewatch/buftok_spec.rb +24 -0
  39. data/spec/filewatch/reading_spec.rb +128 -0
  40. data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
  41. data/spec/filewatch/spec_helper.rb +120 -0
  42. data/spec/filewatch/tailing_spec.rb +440 -0
  43. data/spec/filewatch/watched_file_spec.rb +38 -0
  44. data/spec/filewatch/watched_files_collection_spec.rb +73 -0
  45. data/spec/filewatch/winhelper_spec.rb +22 -0
  46. data/spec/fixtures/compressed.log.gz +0 -0
  47. data/spec/fixtures/compressed.log.gzip +0 -0
  48. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  49. data/spec/fixtures/no-final-newline.log +2 -0
  50. data/spec/fixtures/uncompressed.log +2 -0
  51. data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
  52. data/spec/inputs/file_read_spec.rb +155 -0
  53. data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
  54. metadata +96 -28
@@ -0,0 +1,107 @@
1
+ # encoding: utf-8
2
+ require "logstash/util/loggable"
3
+
4
+ module FileWatch
5
+ class Watch
6
+ include LogStash::Util::Loggable
7
+
8
+ attr_accessor :lastwarn_max_files
9
+ attr_reader :discoverer, :watched_files_collection
10
+
11
+ def initialize(discoverer, watched_files_collection, settings)
12
+ @settings = settings
13
+ # watch and iterate_on_state can be called from different threads.
14
+ @lock = Mutex.new
15
+ # we need to be threadsafe about the quit mutation
16
+ @quit = false
17
+ @quit_lock = Mutex.new
18
+ @lastwarn_max_files = 0
19
+ @discoverer = discoverer
20
+ @watched_files_collection = watched_files_collection
21
+ end
22
+
23
+ def add_processor(processor)
24
+ @processor = processor
25
+ @processor.add_watch(self)
26
+ self
27
+ end
28
+
29
+ def watch(path)
30
+ synchronized do
31
+ @discoverer.add_path(path)
32
+ end
33
+ # don't return whatever @discoverer.add_path returns
34
+ return true
35
+ end
36
+
37
+ def discover
38
+ synchronized do
39
+ @discoverer.discover
40
+ end
41
+ # don't return whatever @discoverer.discover returns
42
+ return true
43
+ end
44
+
45
+ def subscribe(observer, sincedb_collection)
46
+ @processor.initialize_handlers(sincedb_collection, observer)
47
+
48
+ glob = 0
49
+ interval = @settings.discover_interval
50
+ reset_quit
51
+ until quit?
52
+ iterate_on_state
53
+ break if quit?
54
+ glob += 1
55
+ if glob == interval
56
+ discover
57
+ glob = 0
58
+ end
59
+ break if quit?
60
+ sleep(@settings.stat_interval)
61
+ end
62
+ @watched_files_collection.close_all
63
+ end # def subscribe
64
+
65
+ # Read mode processor will handle watched_files in the closed, ignored, watched and active state
66
+ # differently from Tail mode - see the ReadMode::Processor and TailMode::Processor
67
+ def iterate_on_state
68
+ return if @watched_files_collection.empty?
69
+ synchronized do
70
+ begin
71
+ # creates this snapshot of watched_file values just once
72
+ watched_files = @watched_files_collection.values
73
+ @processor.process_closed(watched_files)
74
+ return if quit?
75
+ @processor.process_ignored(watched_files)
76
+ return if quit?
77
+ @processor.process_watched(watched_files)
78
+ return if quit?
79
+ @processor.process_active(watched_files)
80
+ ensure
81
+ @watched_files_collection.delete(@processor.deletable_filepaths)
82
+ @processor.deletable_filepaths.clear
83
+ end
84
+ end
85
+ end # def each
86
+
87
+ def quit
88
+ @quit_lock.synchronize do
89
+ @quit = true
90
+ end
91
+ end # def quit
92
+
93
+ def quit?
94
+ @quit_lock.synchronize { @quit }
95
+ end
96
+
97
+ private
98
+
99
+ def synchronized(&block)
100
+ @lock.synchronize { block.call }
101
+ end
102
+
103
+ def reset_quit
104
+ @quit_lock.synchronize { @quit = false }
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,226 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch
4
+ class WatchedFile
5
+ include InodeMixin # see bootstrap.rb at `if LogStash::Environment.windows?`
6
+
7
+ attr_reader :bytes_read, :state, :file, :buffer, :recent_states
8
+ attr_reader :path, :filestat, :accessed_at, :modified_at, :pathname
9
+ attr_reader :sdb_key_v1, :last_stat_size, :listener
10
+ attr_accessor :last_open_warning_at
11
+
12
+ # this class represents a file that has been discovered
13
+ def initialize(pathname, stat, settings)
14
+ @settings = settings
15
+ @pathname = Pathname.new(pathname) # given arg pathname might be a string or a Pathname object
16
+ @path = @pathname.to_path
17
+ @bytes_read = 0
18
+ @last_stat_size = 0
19
+ # the prepare_inode method is sourced from the mixed module above
20
+ @sdb_key_v1 = InodeStruct.new(*prepare_inode(path, stat))
21
+ # initial as true means we have not associated this watched_file with a previous sincedb value yet.
22
+ # and we should read from the beginning if necessary
23
+ @initial = true
24
+ @recent_states = [] # keep last 8 states, managed in set_state
25
+ @state = :watched
26
+ set_stat(stat) # can change @last_stat_size
27
+ @listener = nil
28
+ @last_open_warning_at = nil
29
+ set_accessed_at
30
+ end
31
+
32
+ def set_listener(observer)
33
+ @listener = observer.listener_for(@path)
34
+ end
35
+
36
+ def unset_listener
37
+ @listener = nil
38
+ end
39
+
40
+ def has_listener?
41
+ !@listener.nil?
42
+ end
43
+
44
+ def sincedb_key
45
+ @sdb_key_v1
46
+ end
47
+
48
+ def initial_completed
49
+ @initial = false
50
+ end
51
+
52
+ def set_accessed_at
53
+ @accessed_at = Time.now.to_f
54
+ end
55
+
56
+ def initial?
57
+ @initial
58
+ end
59
+
60
+ def compressed?
61
+ @path.end_with?('.gz','.gzip')
62
+ end
63
+
64
+ def size_changed?
65
+ @last_stat_size != bytes_read
66
+ end
67
+
68
+ def all_read?
69
+ @last_stat_size == bytes_read
70
+ end
71
+
72
+ def open
73
+ file_add_opened(FileOpener.open(@path))
74
+ end
75
+
76
+ def file_add_opened(rubyfile)
77
+ @file = rubyfile
78
+ @buffer = BufferedTokenizer.new(@settings.delimiter) if @buffer.nil?
79
+ end
80
+
81
+ def file_close
82
+ return if @file.nil? || @file.closed?
83
+ @file.close
84
+ @file = nil
85
+ end
86
+
87
+ def file_seek(amount, whence = IO::SEEK_SET)
88
+ @file.sysseek(amount, whence)
89
+ end
90
+
91
+ def file_read(amount)
92
+ set_accessed_at
93
+ @file.sysread(amount)
94
+ end
95
+
96
+ def file_open?
97
+ !@file.nil? && !@file.closed?
98
+ end
99
+
100
+ def reset_buffer
101
+ @buffer.flush
102
+ end
103
+
104
+ def buffer_extract(data)
105
+ @buffer.extract(data)
106
+ end
107
+
108
+ def increment_bytes_read(delta)
109
+ return if delta.nil?
110
+ @bytes_read += delta
111
+ end
112
+
113
+ def update_bytes_read(total_bytes_read)
114
+ return if total_bytes_read.nil?
115
+ @bytes_read = total_bytes_read
116
+ end
117
+
118
+ def update_path(_path)
119
+ @path = _path
120
+ end
121
+
122
+ def update_stat(st)
123
+ set_stat(st)
124
+ end
125
+
126
+ def activate
127
+ set_state :active
128
+ end
129
+
130
+ def ignore
131
+ set_state :ignored
132
+ @bytes_read = @filestat.size
133
+ end
134
+
135
+ def close
136
+ set_state :closed
137
+ end
138
+
139
+ def watch
140
+ set_state :watched
141
+ end
142
+
143
+ def unwatch
144
+ set_state :unwatched
145
+ end
146
+
147
+ def active?
148
+ @state == :active
149
+ end
150
+
151
+ def ignored?
152
+ @state == :ignored
153
+ end
154
+
155
+ def closed?
156
+ @state == :closed
157
+ end
158
+
159
+ def watched?
160
+ @state == :watched
161
+ end
162
+
163
+ def unwatched?
164
+ @state == :unwatched
165
+ end
166
+
167
+ def expiry_close_enabled?
168
+ !@settings.close_older.nil?
169
+ end
170
+
171
+ def expiry_ignore_enabled?
172
+ !@settings.ignore_older.nil?
173
+ end
174
+
175
+ def shrunk?
176
+ @last_stat_size < @bytes_read
177
+ end
178
+
179
+ def grown?
180
+ @last_stat_size > @bytes_read
181
+ end
182
+
183
+ def restat
184
+ set_stat(pathname.stat)
185
+ end
186
+
187
+ def set_state(value)
188
+ @recent_states.shift if @recent_states.size == 8
189
+ @recent_states << @state
190
+ @state = value
191
+ end
192
+
193
+ def recent_state_history
194
+ @recent_states + Array(@state)
195
+ end
196
+
197
+ def file_closable?
198
+ file_can_close? && all_read?
199
+ end
200
+
201
+ def file_ignorable?
202
+ return false unless expiry_ignore_enabled?
203
+ # (Time.now - stat.mtime) <- in jruby, this does int and float
204
+ # conversions before the subtraction and returns a float.
205
+ # so use all floats upfront
206
+ (Time.now.to_f - @modified_at) > @settings.ignore_older
207
+ end
208
+
209
+ def file_can_close?
210
+ return false unless expiry_close_enabled?
211
+ (Time.now.to_f - @accessed_at) > @settings.close_older
212
+ end
213
+
214
+ def to_s
215
+ inspect
216
+ end
217
+
218
+ private
219
+
220
+ def set_stat(stat)
221
+ @modified_at = stat.mtime.to_f
222
+ @last_stat_size = stat.size
223
+ @filestat = stat
224
+ end
225
+ end
226
+ end
@@ -0,0 +1,84 @@
1
+ # encoding: utf-8
2
+ module FileWatch
3
+ class WatchedFilesCollection
4
+
5
+ def initialize(settings)
6
+ @sort_by = settings.file_sort_by # "last_modified" | "path"
7
+ @sort_direction = settings.file_sort_direction # "asc" | "desc"
8
+ @sort_method = method("#{@sort_by}_#{@sort_direction}".to_sym)
9
+ @files = []
10
+ @pointers = {}
11
+ end
12
+
13
+ def add(watched_file)
14
+ @files << watched_file
15
+ @sort_method.call
16
+ end
17
+
18
+ def delete(paths)
19
+ Array(paths).each do |f|
20
+ index = @pointers.delete(f)
21
+ @files.delete_at(index)
22
+ end
23
+ @sort_method.call
24
+ end
25
+
26
+ def close_all
27
+ @files.each(&:file_close)
28
+ end
29
+
30
+ def empty?
31
+ @files.empty?
32
+ end
33
+
34
+ def keys
35
+ @pointers.keys
36
+ end
37
+
38
+ def values
39
+ @files
40
+ end
41
+
42
+ def watched_file_by_path(path)
43
+ index = @pointers[path]
44
+ return nil unless index
45
+ @files[index]
46
+ end
47
+
48
+ private
49
+
50
+ def last_modified_asc
51
+ @files.sort! do |left, right|
52
+ left.modified_at <=> right.modified_at
53
+ end
54
+ refresh_pointers
55
+ end
56
+
57
+ def last_modified_desc
58
+ @files.sort! do |left, right|
59
+ right.modified_at <=> left.modified_at
60
+ end
61
+ refresh_pointers
62
+ end
63
+
64
+ def path_asc
65
+ @files.sort! do |left, right|
66
+ left.path <=> right.path
67
+ end
68
+ refresh_pointers
69
+ end
70
+
71
+ def path_desc
72
+ @files.sort! do |left, right|
73
+ right.path <=> left.path
74
+ end
75
+ refresh_pointers
76
+ end
77
+
78
+ def refresh_pointers
79
+ @files.each_with_index do |watched_file, index|
80
+ @pointers[watched_file.path] = index
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+ require "ffi"
3
+
4
+ module Winhelper
5
+ extend FFI::Library
6
+
7
+ ffi_lib 'kernel32'
8
+ ffi_convention :stdcall
9
+ class FileTime < FFI::Struct
10
+ layout :lowDateTime, :uint,
11
+ :highDateTime, :uint
12
+ end
13
+
14
+ #http://msdn.microsoft.com/en-us/library/windows/desktop/aa363788(v=vs.85).aspx
15
+ class FileInformation < FFI::Struct
16
+ layout :fileAttributes, :uint, #DWORD dwFileAttributes;
17
+ :createTime, FileTime, #FILETIME ftCreationTime;
18
+ :lastAccessTime, FileTime, #FILETIME ftLastAccessTime;
19
+ :lastWriteTime, FileTime, #FILETIME ftLastWriteTime;
20
+ :volumeSerialNumber, :uint, #DWORD dwVolumeSerialNumber;
21
+ :fileSizeHigh, :uint, #DWORD nFileSizeHigh;
22
+ :fileSizeLow, :uint, #DWORD nFileSizeLow;
23
+ :numberOfLinks, :uint, #DWORD nNumberOfLinks;
24
+ :fileIndexHigh, :uint, #DWORD nFileIndexHigh;
25
+ :fileIndexLow, :uint #DWORD nFileIndexLow;
26
+ end
27
+
28
+
29
+ #http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
30
+ #HANDLE WINAPI CreateFile(_In_ LPCTSTR lpFileName,_In_ DWORD dwDesiredAccess,_In_ DWORD dwShareMode,
31
+ # _In_opt_ LPSECURITY_ATTRIBUTES lpSecurityAttributes,_In_ DWORD dwCreationDisposition,
32
+ # _In_ DWORD dwFlagsAndAttributes,_In_opt_ HANDLE hTemplateFile);
33
+ attach_function :GetOpenFileHandle, :CreateFileA, [:pointer, :uint, :uint, :pointer, :uint, :uint, :pointer], :pointer
34
+
35
+ #http://msdn.microsoft.com/en-us/library/windows/desktop/aa364952(v=vs.85).aspx
36
+ #BOOL WINAPI GetFileInformationByHandle(_In_ HANDLE hFile,_Out_ LPBY_HANDLE_FILE_INFORMATION lpFileInformation);
37
+ attach_function :GetFileInformationByHandle, [:pointer, :pointer], :int
38
+
39
+ attach_function :CloseHandle, [:pointer], :int
40
+
41
+
42
+ def self.GetWindowsUniqueFileIdentifier(path)
43
+ handle = GetOpenFileHandle(path, 0, 7, nil, 3, 128, nil)
44
+ fileInfo = Winhelper::FileInformation.new
45
+ success = GetFileInformationByHandle(handle, fileInfo)
46
+ CloseHandle(handle)
47
+ if success == 1
48
+ #args = [
49
+ # fileInfo[:fileAttributes], fileInfo[:volumeSerialNumber], fileInfo[:fileSizeHigh], fileInfo[:fileSizeLow],
50
+ # fileInfo[:numberOfLinks], fileInfo[:fileIndexHigh], fileInfo[:fileIndexLow]
51
+ # ]
52
+ #p "Information: %u %u %u %u %u %u %u " % args
53
+ #this is only guaranteed on NTFS, for ReFS on windows 2012, GetFileInformationByHandleEx should be used with FILE_ID_INFO, which returns a 128 bit identifier
54
+ return "#{fileInfo[:volumeSerialNumber]}-#{fileInfo[:fileIndexLow]}-#{fileInfo[:fileIndexHigh]}"
55
+ else
56
+ #p "cannot retrieve file information, returning path"
57
+ return path;
58
+ end
59
+ end
60
+ end
61
+
62
+ #fileId = Winhelper.GetWindowsUniqueFileIdentifier('C:\inetpub\logs\LogFiles\W3SVC1\u_ex1fdsadfsadfasdf30612.log')
63
+ #p "FileId: " + fileId
64
+ #p "outside function, sleeping"
65
+ #sleep(10)