logstash-input-file 4.0.5 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -3
  3. data/JAR_VERSION +1 -0
  4. data/docs/index.asciidoc +195 -37
  5. data/lib/filewatch/bootstrap.rb +74 -0
  6. data/lib/filewatch/discoverer.rb +94 -0
  7. data/lib/filewatch/helper.rb +65 -0
  8. data/lib/filewatch/observing_base.rb +97 -0
  9. data/lib/filewatch/observing_read.rb +23 -0
  10. data/lib/filewatch/observing_tail.rb +22 -0
  11. data/lib/filewatch/read_mode/handlers/base.rb +81 -0
  12. data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
  13. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
  14. data/lib/filewatch/read_mode/processor.rb +117 -0
  15. data/lib/filewatch/settings.rb +67 -0
  16. data/lib/filewatch/sincedb_collection.rb +215 -0
  17. data/lib/filewatch/sincedb_record_serializer.rb +70 -0
  18. data/lib/filewatch/sincedb_value.rb +87 -0
  19. data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
  20. data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
  21. data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
  22. data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
  23. data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
  24. data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
  25. data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
  26. data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
  27. data/lib/filewatch/tail_mode/processor.rb +209 -0
  28. data/lib/filewatch/watch.rb +107 -0
  29. data/lib/filewatch/watched_file.rb +226 -0
  30. data/lib/filewatch/watched_files_collection.rb +84 -0
  31. data/lib/filewatch/winhelper.rb +65 -0
  32. data/lib/jars/filewatch-1.0.0.jar +0 -0
  33. data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
  34. data/lib/logstash/inputs/file.rb +162 -107
  35. data/lib/logstash/inputs/file_listener.rb +61 -0
  36. data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
  37. data/logstash-input-file.gemspec +5 -4
  38. data/spec/filewatch/buftok_spec.rb +24 -0
  39. data/spec/filewatch/reading_spec.rb +128 -0
  40. data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
  41. data/spec/filewatch/spec_helper.rb +120 -0
  42. data/spec/filewatch/tailing_spec.rb +440 -0
  43. data/spec/filewatch/watched_file_spec.rb +38 -0
  44. data/spec/filewatch/watched_files_collection_spec.rb +73 -0
  45. data/spec/filewatch/winhelper_spec.rb +22 -0
  46. data/spec/fixtures/compressed.log.gz +0 -0
  47. data/spec/fixtures/compressed.log.gzip +0 -0
  48. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  49. data/spec/fixtures/no-final-newline.log +2 -0
  50. data/spec/fixtures/uncompressed.log +2 -0
  51. data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
  52. data/spec/inputs/file_read_spec.rb +155 -0
  53. data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
  54. metadata +96 -28
@@ -0,0 +1,117 @@
1
+ # encoding: utf-8
2
+ require "logstash/util/loggable"
3
+
4
+ require_relative "handlers/base"
5
+ require_relative "handlers/read_file"
6
+ require_relative "handlers/read_zip_file"
7
+
8
+ module FileWatch module ReadMode
9
+ # Must handle
10
+ # :read_file
11
+ # :read_zip_file
12
+ class Processor
13
+ include LogStash::Util::Loggable
14
+
15
+ attr_reader :watch, :deletable_filepaths
16
+
17
+ def initialize(settings)
18
+ @settings = settings
19
+ @deletable_filepaths = []
20
+ end
21
+
22
+ def add_watch(watch)
23
+ @watch = watch
24
+ self
25
+ end
26
+
27
+ def initialize_handlers(sincedb_collection, observer)
28
+ @read_file = Handlers::ReadFile.new(sincedb_collection, observer, @settings)
29
+ @read_zip_file = Handlers::ReadZipFile.new(sincedb_collection, observer, @settings)
30
+ end
31
+
32
+ def read_file(watched_file)
33
+ @read_file.handle(watched_file)
34
+ end
35
+
36
+ def read_zip_file(watched_file)
37
+ @read_zip_file.handle(watched_file)
38
+ end
39
+
40
+ def process_closed(watched_files)
41
+ # do not process watched_files in the closed state.
42
+ end
43
+
44
+ def process_ignored(watched_files)
45
+ # do not process watched_files in the ignored state.
46
+ end
47
+
48
+ def process_watched(watched_files)
49
+ logger.debug("Watched processing")
50
+ # Handles watched_files in the watched state.
51
+ # for a slice of them:
52
+ # move to the active state
53
+ # should never have been active before
54
+ # how much of the max active window is available
55
+ to_take = @settings.max_active - watched_files.count{|wf| wf.active?}
56
+ if to_take > 0
57
+ watched_files.select {|wf| wf.watched?}.take(to_take).each do |watched_file|
58
+ path = watched_file.path
59
+ begin
60
+ watched_file.restat
61
+ watched_file.activate
62
+ rescue Errno::ENOENT
63
+ common_deleted_reaction(watched_file, "Watched")
64
+ next
65
+ rescue => e
66
+ common_error_reaction(path, e, "Watched")
67
+ next
68
+ end
69
+ break if watch.quit?
70
+ end
71
+ else
72
+ now = Time.now.to_i
73
+ if (now - watch.lastwarn_max_files) > MAX_FILES_WARN_INTERVAL
74
+ waiting = watched_files.size - @settings.max_active
75
+ logger.warn(@settings.max_warn_msg + ", files yet to open: #{waiting}")
76
+ watch.lastwarn_max_files = now
77
+ end
78
+ end
79
+ end
80
+
81
+ def process_active(watched_files)
82
+ logger.debug("Active processing")
83
+ # Handles watched_files in the active state.
84
+ watched_files.select {|wf| wf.active? }.each do |watched_file|
85
+ path = watched_file.path
86
+ begin
87
+ watched_file.restat
88
+ rescue Errno::ENOENT
89
+ common_deleted_reaction(watched_file, "Active")
90
+ next
91
+ rescue => e
92
+ common_error_reaction(path, e, "Active")
93
+ next
94
+ end
95
+ break if watch.quit?
96
+
97
+ if watched_file.compressed?
98
+ read_zip_file(watched_file)
99
+ else
100
+ read_file(watched_file)
101
+ end
102
+ # handlers take care of closing and unwatching
103
+ end
104
+ end
105
+
106
+ def common_deleted_reaction(watched_file, action)
107
+ # file has gone away or we can't read it anymore.
108
+ watched_file.unwatch
109
+ deletable_filepaths << watched_file.path
110
+ logger.debug("#{action} - stat failed: #{watched_file.path}, removing from collection")
111
+ end
112
+
113
+ def common_error_reaction(path, error, action)
114
+ logger.error("#{action} - other error #{path}: (#{error.message}, #{error.backtrace.take(8).inspect})")
115
+ end
116
+ end
117
+ end end
@@ -0,0 +1,67 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch
4
+ class Settings
5
+ attr_reader :delimiter, :close_older, :ignore_older, :delimiter_byte_size
6
+ attr_reader :max_active, :max_warn_msg, :lastwarn_max_files
7
+ attr_reader :sincedb_write_interval, :stat_interval, :discover_interval
8
+ attr_reader :exclude, :start_new_files_at, :file_chunk_count, :file_chunk_size
9
+ attr_reader :sincedb_path, :sincedb_write_interval, :sincedb_expiry_duration
10
+ attr_reader :file_sort_by, :file_sort_direction
11
+
12
+ def self.from_options(opts)
13
+ new.add_options(opts)
14
+ end
15
+
16
+ def self.days_to_seconds(days)
17
+ (24 * 3600) * days.to_f
18
+ end
19
+
20
+ def initialize
21
+ defaults = {
22
+ :delimiter => "\n",
23
+ :file_chunk_size => FILE_READ_SIZE,
24
+ :max_active => 4095,
25
+ :file_chunk_count => FIXNUM_MAX,
26
+ :sincedb_clean_after => 14,
27
+ :exclude => [],
28
+ :stat_interval => 1,
29
+ :discover_interval => 5,
30
+ :file_sort_by => "last_modified",
31
+ :file_sort_direction => "asc",
32
+ }
33
+ @opts = {}
34
+ @lastwarn_max_files = 0
35
+ add_options(defaults)
36
+ end
37
+
38
+ def add_options(opts)
39
+ @opts.update(opts)
40
+ self.max_open_files = @opts[:max_active]
41
+ @delimiter = @opts[:delimiter]
42
+ @delimiter_byte_size = @delimiter.bytesize
43
+ @file_chunk_size = @opts[:file_chunk_size]
44
+ @close_older = @opts[:close_older]
45
+ @ignore_older = @opts[:ignore_older]
46
+ @sincedb_write_interval = @opts[:sincedb_write_interval]
47
+ @stat_interval = @opts[:stat_interval]
48
+ @discover_interval = @opts[:discover_interval]
49
+ @exclude = Array(@opts[:exclude])
50
+ @start_new_files_at = @opts[:start_new_files_at]
51
+ @file_chunk_count = @opts[:file_chunk_count]
52
+ @sincedb_path = @opts[:sincedb_path]
53
+ @sincedb_write_interval = @opts[:sincedb_write_interval]
54
+ @sincedb_expiry_duration = self.class.days_to_seconds(@opts.fetch(:sincedb_clean_after, 14))
55
+ @file_sort_by = @opts[:file_sort_by]
56
+ @file_sort_direction = @opts[:file_sort_direction]
57
+ self
58
+ end
59
+
60
+ def max_open_files=(value)
61
+ val = value.to_i
62
+ val = 4095 if value.nil? || val <= 0
63
+ @max_warn_msg = "Reached open files limit: #{val}, set by the 'max_open_files' option or default"
64
+ @max_active = val
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,215 @@
1
+ # encoding: utf-8
2
+ require "logstash/util/loggable"
3
+
4
+ module FileWatch
5
+ # this KV collection has a watched_file storage_key (an InodeStruct) as the key
6
+ # and a SincedbValue as the value.
7
+ # the SincedbValues are built by reading the sincedb file.
8
+ class SincedbCollection
9
+ include LogStash::Util::Loggable
10
+
11
+ attr_reader :path
12
+ attr_writer :serializer
13
+
14
+ def initialize(settings)
15
+ @settings = settings
16
+ @sincedb_last_write = 0
17
+ @sincedb = {}
18
+ @serializer = SincedbRecordSerializer.new(@settings.sincedb_expiry_duration)
19
+ @path = Pathname.new(@settings.sincedb_path)
20
+ @write_method = LogStash::Environment.windows? || @path.chardev? || @path.blockdev? ? method(:non_atomic_write) : method(:atomic_write)
21
+ @full_path = @path.to_path
22
+ FileUtils.touch(@full_path)
23
+ end
24
+
25
+ def request_disk_flush
26
+ now = Time.now.to_i
27
+ delta = now - @sincedb_last_write
28
+ if delta >= @settings.sincedb_write_interval
29
+ logger.debug("writing sincedb (delta since last write = #{delta})")
30
+ sincedb_write(now)
31
+ end
32
+ end
33
+
34
+ def write(reason=nil)
35
+ logger.debug("caller requested sincedb write (#{reason})")
36
+ sincedb_write
37
+ end
38
+
39
+ def open
40
+ @time_sdb_opened = Time.now.to_f
41
+ begin
42
+ path.open do |file|
43
+ logger.debug("open: reading from #{path}")
44
+ @serializer.deserialize(file) do |key, value|
45
+ logger.debug("open: importing ... '#{key}' => '#{value}'")
46
+ set_key_value(key, value)
47
+ end
48
+ end
49
+ logger.debug("open: count of keys read: #{@sincedb.keys.size}")
50
+ rescue => e
51
+ #No existing sincedb to load
52
+ logger.debug("open: error: #{path}: #{e.inspect}")
53
+ end
54
+
55
+ end
56
+
57
+ def associate(watched_file)
58
+ logger.debug("associate: finding: #{watched_file.path}")
59
+ sincedb_value = find(watched_file)
60
+ if sincedb_value.nil?
61
+ # sincedb has no record of this inode
62
+ # and due to the window handling of many files
63
+ # this file may not be opened in this session.
64
+ # a new value will be added when the file is opened
65
+ return
66
+ end
67
+ if sincedb_value.watched_file.nil?
68
+ # not associated
69
+ if sincedb_value.path_in_sincedb.nil?
70
+ # old v1 record, assume its the same file
71
+ handle_association(sincedb_value, watched_file)
72
+ return
73
+ end
74
+ if sincedb_value.path_in_sincedb == watched_file.path
75
+ # the path on disk is the same as discovered path
76
+ # and the inode is the same.
77
+ handle_association(sincedb_value, watched_file)
78
+ return
79
+ end
80
+ # the path on disk is different from discovered unassociated path
81
+ # but they have the same key (inode)
82
+ # treat as a new file, a new value will be added when the file is opened
83
+ logger.debug("associate: matched but allocated to another - #{sincedb_value}")
84
+ sincedb_value.clear_watched_file
85
+ delete(watched_file.sincedb_key)
86
+ return
87
+ end
88
+ if sincedb_value.watched_file.equal?(watched_file) # pointer equals
89
+ logger.debug("associate: already associated - #{sincedb_value}, for path: #{watched_file.path}")
90
+ return
91
+ end
92
+ # sincedb_value.watched_file is not the discovered watched_file but they have the same key (inode)
93
+ # this means that the filename was changed during this session.
94
+ # logout the history of the old sincedb_value and remove it
95
+ # a new value will be added when the file is opened
96
+ # TODO notify about done-ness of old sincedb_value and watched_file
97
+ old_watched_file = sincedb_value.watched_file
98
+ sincedb_value.clear_watched_file
99
+ if logger.debug?
100
+ logger.debug("associate: matched but allocated to another - #{sincedb_value}")
101
+ logger.debug("associate: matched but allocated to another - old watched_file history - #{old_watched_file.recent_state_history.join(', ')}")
102
+ logger.debug("associate: matched but allocated to another - DELETING value at key `#{old_watched_file.sincedb_key}`")
103
+ end
104
+ delete(old_watched_file.sincedb_key)
105
+ end
106
+
107
+ def find(watched_file)
108
+ get(watched_file.sincedb_key).tap do |obj|
109
+ logger.debug("find for path: #{watched_file.path}, found: '#{!obj.nil?}'")
110
+ end
111
+ end
112
+
113
+ def member?(key)
114
+ @sincedb.member?(key)
115
+ end
116
+
117
+ def get(key)
118
+ @sincedb[key]
119
+ end
120
+
121
+ def delete(key)
122
+ @sincedb.delete(key)
123
+ end
124
+
125
+ def last_read(key)
126
+ @sincedb[key].position
127
+ end
128
+
129
+ def rewind(key)
130
+ @sincedb[key].update_position(0)
131
+ end
132
+
133
+ def store_last_read(key, last_read)
134
+ @sincedb[key].update_position(last_read)
135
+ end
136
+
137
+ def increment(key, amount)
138
+ @sincedb[key].increment_position(amount)
139
+ end
140
+
141
+ def set_watched_file(key, watched_file)
142
+ @sincedb[key].set_watched_file(watched_file)
143
+ end
144
+
145
+ def unset_watched_file(watched_file)
146
+ return unless member?(watched_file.sincedb_key)
147
+ get(watched_file.sincedb_key).unset_watched_file
148
+ end
149
+
150
+ def clear
151
+ @sincedb.clear
152
+ end
153
+
154
+ def keys
155
+ @sincedb.keys
156
+ end
157
+
158
+ def set(key, value)
159
+ @sincedb[key] = value
160
+ value
161
+ end
162
+
163
+ def watched_file_unset?(key)
164
+ return false unless member?(key)
165
+ get(key).watched_file.nil?
166
+ end
167
+
168
+ private
169
+
170
+ def handle_association(sincedb_value, watched_file)
171
+ watched_file.update_bytes_read(sincedb_value.position)
172
+ sincedb_value.set_watched_file(watched_file)
173
+ watched_file.initial_completed
174
+ watched_file.ignore if watched_file.all_read?
175
+ end
176
+
177
+ def set_key_value(key, value)
178
+ if @time_sdb_opened < value.last_changed_at_expires(@settings.sincedb_expiry_duration)
179
+ logger.debug("open: setting #{key.inspect} to #{value.inspect}")
180
+ set(key, value)
181
+ else
182
+ logger.debug("open: record has expired, skipping: #{key.inspect} #{value.inspect}")
183
+ end
184
+ end
185
+
186
+ def sincedb_write(time = Time.now.to_i)
187
+ logger.debug("sincedb_write: to: #{path}")
188
+ begin
189
+ @write_method.call
190
+ @serializer.expired_keys.each do |key|
191
+ @sincedb[key].unset_watched_file
192
+ delete(key)
193
+ logger.debug("sincedb_write: cleaned", "key" => "'#{key}'")
194
+ end
195
+ @sincedb_last_write = time
196
+ rescue Errno::EACCES
197
+ # no file handles free perhaps
198
+ # maybe it will work next time
199
+ logger.debug("sincedb_write: error: #{path}: #{$!}")
200
+ end
201
+ end
202
+
203
+ def atomic_write
204
+ FileHelper.write_atomically(@full_path) do |io|
205
+ @serializer.serialize(@sincedb, io)
206
+ end
207
+ end
208
+
209
+ def non_atomic_write
210
+ IO.open(@full_path, 0) do |io|
211
+ @serializer.serialize(@sincedb, io)
212
+ end
213
+ end
214
+ end
215
+ end
@@ -0,0 +1,70 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch
4
+ class SincedbRecordSerializer
5
+
6
+ attr_reader :expired_keys
7
+
8
+ def initialize(sincedb_value_expiry)
9
+ @sincedb_value_expiry = sincedb_value_expiry
10
+ @expired_keys = []
11
+ end
12
+
13
+ def update_sincedb_value_expiry_from_days(days)
14
+ @sincedb_value_expiry = Settings.days_to_seconds(days)
15
+ end
16
+
17
+ def serialize(db, io, as_of = Time.now.to_f)
18
+ @expired_keys.clear
19
+ db.each do |key, value|
20
+ if as_of > value.last_changed_at_expires(@sincedb_value_expiry)
21
+ @expired_keys << key
22
+ next
23
+ end
24
+ io.write(serialize_record(key, value))
25
+ end
26
+ end
27
+
28
+ def deserialize(io)
29
+ io.each do |record|
30
+ yield deserialize_record(record) #.tap{|val| STDERR.puts val}
31
+ end
32
+ end
33
+
34
+ def serialize_record(k, v)
35
+ # effectively InodeStruct#to_s SincedbValue#to_s
36
+ "#{k} #{v}\n"
37
+ end
38
+
39
+ def deserialize_record(record)
40
+ return [] if record.nil? || record.empty?
41
+ parts = record.split(" ")
42
+ parse_line_v2(parts) || parse_line_v1(parts)
43
+ end
44
+
45
+ private
46
+
47
+ def parse_line_v2(parts)
48
+ # new format e.g. 2977152 1 4 94 1519319662.852678 'path/to/file'
49
+ # do we want to store the last known state of the watched file too?
50
+ return false if parts.size < 5
51
+ inode_struct = prepare_inode_struct(parts)
52
+ pos = parts.shift.to_i
53
+ expires_at = Float(parts.shift) # this is like Time.now.to_f
54
+ path_in_sincedb = parts.shift
55
+ value = SincedbValue.new(pos, expires_at).add_path_in_sincedb(path_in_sincedb)
56
+ [inode_struct, value]
57
+ end
58
+
59
+ def parse_line_v1(parts)
60
+ # old inode based e.g. 2977152 1 4 94
61
+ inode_struct = prepare_inode_struct(parts)
62
+ pos = parts.shift.to_i
63
+ [inode_struct, SincedbValue.new(pos)]
64
+ end
65
+
66
+ def prepare_inode_struct(parts)
67
+ InodeStruct.new(parts.shift, *parts.shift(2).map(&:to_i))
68
+ end
69
+ end
70
+ end