logstash-input-file 4.0.5 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -3
  3. data/JAR_VERSION +1 -0
  4. data/docs/index.asciidoc +195 -37
  5. data/lib/filewatch/bootstrap.rb +74 -0
  6. data/lib/filewatch/discoverer.rb +94 -0
  7. data/lib/filewatch/helper.rb +65 -0
  8. data/lib/filewatch/observing_base.rb +97 -0
  9. data/lib/filewatch/observing_read.rb +23 -0
  10. data/lib/filewatch/observing_tail.rb +22 -0
  11. data/lib/filewatch/read_mode/handlers/base.rb +81 -0
  12. data/lib/filewatch/read_mode/handlers/read_file.rb +47 -0
  13. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +57 -0
  14. data/lib/filewatch/read_mode/processor.rb +117 -0
  15. data/lib/filewatch/settings.rb +67 -0
  16. data/lib/filewatch/sincedb_collection.rb +215 -0
  17. data/lib/filewatch/sincedb_record_serializer.rb +70 -0
  18. data/lib/filewatch/sincedb_value.rb +87 -0
  19. data/lib/filewatch/tail_mode/handlers/base.rb +124 -0
  20. data/lib/filewatch/tail_mode/handlers/create.rb +17 -0
  21. data/lib/filewatch/tail_mode/handlers/create_initial.rb +21 -0
  22. data/lib/filewatch/tail_mode/handlers/delete.rb +11 -0
  23. data/lib/filewatch/tail_mode/handlers/grow.rb +11 -0
  24. data/lib/filewatch/tail_mode/handlers/shrink.rb +20 -0
  25. data/lib/filewatch/tail_mode/handlers/timeout.rb +10 -0
  26. data/lib/filewatch/tail_mode/handlers/unignore.rb +37 -0
  27. data/lib/filewatch/tail_mode/processor.rb +209 -0
  28. data/lib/filewatch/watch.rb +107 -0
  29. data/lib/filewatch/watched_file.rb +226 -0
  30. data/lib/filewatch/watched_files_collection.rb +84 -0
  31. data/lib/filewatch/winhelper.rb +65 -0
  32. data/lib/jars/filewatch-1.0.0.jar +0 -0
  33. data/lib/logstash/inputs/delete_completed_file_handler.rb +9 -0
  34. data/lib/logstash/inputs/file.rb +162 -107
  35. data/lib/logstash/inputs/file_listener.rb +61 -0
  36. data/lib/logstash/inputs/log_completed_file_handler.rb +13 -0
  37. data/logstash-input-file.gemspec +5 -4
  38. data/spec/filewatch/buftok_spec.rb +24 -0
  39. data/spec/filewatch/reading_spec.rb +128 -0
  40. data/spec/filewatch/sincedb_record_serializer_spec.rb +71 -0
  41. data/spec/filewatch/spec_helper.rb +120 -0
  42. data/spec/filewatch/tailing_spec.rb +440 -0
  43. data/spec/filewatch/watched_file_spec.rb +38 -0
  44. data/spec/filewatch/watched_files_collection_spec.rb +73 -0
  45. data/spec/filewatch/winhelper_spec.rb +22 -0
  46. data/spec/fixtures/compressed.log.gz +0 -0
  47. data/spec/fixtures/compressed.log.gzip +0 -0
  48. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  49. data/spec/fixtures/no-final-newline.log +2 -0
  50. data/spec/fixtures/uncompressed.log +2 -0
  51. data/spec/{spec_helper.rb → helpers/spec_helper.rb} +14 -41
  52. data/spec/inputs/file_read_spec.rb +155 -0
  53. data/spec/inputs/{file_spec.rb → file_tail_spec.rb} +55 -52
  54. metadata +96 -28
@@ -0,0 +1,117 @@
1
+ # encoding: utf-8
2
+ require "logstash/util/loggable"
3
+
4
+ require_relative "handlers/base"
5
+ require_relative "handlers/read_file"
6
+ require_relative "handlers/read_zip_file"
7
+
8
+ module FileWatch module ReadMode
9
+ # Must handle
10
+ # :read_file
11
+ # :read_zip_file
12
+ class Processor
13
+ include LogStash::Util::Loggable
14
+
15
+ attr_reader :watch, :deletable_filepaths
16
+
17
+ def initialize(settings)
18
+ @settings = settings
19
+ @deletable_filepaths = []
20
+ end
21
+
22
+ def add_watch(watch)
23
+ @watch = watch
24
+ self
25
+ end
26
+
27
+ def initialize_handlers(sincedb_collection, observer)
28
+ @read_file = Handlers::ReadFile.new(sincedb_collection, observer, @settings)
29
+ @read_zip_file = Handlers::ReadZipFile.new(sincedb_collection, observer, @settings)
30
+ end
31
+
32
+ def read_file(watched_file)
33
+ @read_file.handle(watched_file)
34
+ end
35
+
36
+ def read_zip_file(watched_file)
37
+ @read_zip_file.handle(watched_file)
38
+ end
39
+
40
+ def process_closed(watched_files)
41
+ # do not process watched_files in the closed state.
42
+ end
43
+
44
+ def process_ignored(watched_files)
45
+ # do not process watched_files in the ignored state.
46
+ end
47
+
48
+ def process_watched(watched_files)
49
+ logger.debug("Watched processing")
50
+ # Handles watched_files in the watched state.
51
+ # for a slice of them:
52
+ # move to the active state
53
+ # should never have been active before
54
+ # how much of the max active window is available
55
+ to_take = @settings.max_active - watched_files.count{|wf| wf.active?}
56
+ if to_take > 0
57
+ watched_files.select {|wf| wf.watched?}.take(to_take).each do |watched_file|
58
+ path = watched_file.path
59
+ begin
60
+ watched_file.restat
61
+ watched_file.activate
62
+ rescue Errno::ENOENT
63
+ common_deleted_reaction(watched_file, "Watched")
64
+ next
65
+ rescue => e
66
+ common_error_reaction(path, e, "Watched")
67
+ next
68
+ end
69
+ break if watch.quit?
70
+ end
71
+ else
72
+ now = Time.now.to_i
73
+ if (now - watch.lastwarn_max_files) > MAX_FILES_WARN_INTERVAL
74
+ waiting = watched_files.size - @settings.max_active
75
+ logger.warn(@settings.max_warn_msg + ", files yet to open: #{waiting}")
76
+ watch.lastwarn_max_files = now
77
+ end
78
+ end
79
+ end
80
+
81
+ def process_active(watched_files)
82
+ logger.debug("Active processing")
83
+ # Handles watched_files in the active state.
84
+ watched_files.select {|wf| wf.active? }.each do |watched_file|
85
+ path = watched_file.path
86
+ begin
87
+ watched_file.restat
88
+ rescue Errno::ENOENT
89
+ common_deleted_reaction(watched_file, "Active")
90
+ next
91
+ rescue => e
92
+ common_error_reaction(path, e, "Active")
93
+ next
94
+ end
95
+ break if watch.quit?
96
+
97
+ if watched_file.compressed?
98
+ read_zip_file(watched_file)
99
+ else
100
+ read_file(watched_file)
101
+ end
102
+ # handlers take care of closing and unwatching
103
+ end
104
+ end
105
+
106
+ def common_deleted_reaction(watched_file, action)
107
+ # file has gone away or we can't read it anymore.
108
+ watched_file.unwatch
109
+ deletable_filepaths << watched_file.path
110
+ logger.debug("#{action} - stat failed: #{watched_file.path}, removing from collection")
111
+ end
112
+
113
+ def common_error_reaction(path, error, action)
114
+ logger.error("#{action} - other error #{path}: (#{error.message}, #{error.backtrace.take(8).inspect})")
115
+ end
116
+ end
117
+ end end
@@ -0,0 +1,67 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch
4
+ class Settings
5
+ attr_reader :delimiter, :close_older, :ignore_older, :delimiter_byte_size
6
+ attr_reader :max_active, :max_warn_msg, :lastwarn_max_files
7
+ attr_reader :sincedb_write_interval, :stat_interval, :discover_interval
8
+ attr_reader :exclude, :start_new_files_at, :file_chunk_count, :file_chunk_size
9
+ attr_reader :sincedb_path, :sincedb_write_interval, :sincedb_expiry_duration
10
+ attr_reader :file_sort_by, :file_sort_direction
11
+
12
+ def self.from_options(opts)
13
+ new.add_options(opts)
14
+ end
15
+
16
+ def self.days_to_seconds(days)
17
+ (24 * 3600) * days.to_f
18
+ end
19
+
20
+ def initialize
21
+ defaults = {
22
+ :delimiter => "\n",
23
+ :file_chunk_size => FILE_READ_SIZE,
24
+ :max_active => 4095,
25
+ :file_chunk_count => FIXNUM_MAX,
26
+ :sincedb_clean_after => 14,
27
+ :exclude => [],
28
+ :stat_interval => 1,
29
+ :discover_interval => 5,
30
+ :file_sort_by => "last_modified",
31
+ :file_sort_direction => "asc",
32
+ }
33
+ @opts = {}
34
+ @lastwarn_max_files = 0
35
+ add_options(defaults)
36
+ end
37
+
38
+ def add_options(opts)
39
+ @opts.update(opts)
40
+ self.max_open_files = @opts[:max_active]
41
+ @delimiter = @opts[:delimiter]
42
+ @delimiter_byte_size = @delimiter.bytesize
43
+ @file_chunk_size = @opts[:file_chunk_size]
44
+ @close_older = @opts[:close_older]
45
+ @ignore_older = @opts[:ignore_older]
46
+ @sincedb_write_interval = @opts[:sincedb_write_interval]
47
+ @stat_interval = @opts[:stat_interval]
48
+ @discover_interval = @opts[:discover_interval]
49
+ @exclude = Array(@opts[:exclude])
50
+ @start_new_files_at = @opts[:start_new_files_at]
51
+ @file_chunk_count = @opts[:file_chunk_count]
52
+ @sincedb_path = @opts[:sincedb_path]
53
+ @sincedb_write_interval = @opts[:sincedb_write_interval]
54
+ @sincedb_expiry_duration = self.class.days_to_seconds(@opts.fetch(:sincedb_clean_after, 14))
55
+ @file_sort_by = @opts[:file_sort_by]
56
+ @file_sort_direction = @opts[:file_sort_direction]
57
+ self
58
+ end
59
+
60
+ def max_open_files=(value)
61
+ val = value.to_i
62
+ val = 4095 if value.nil? || val <= 0
63
+ @max_warn_msg = "Reached open files limit: #{val}, set by the 'max_open_files' option or default"
64
+ @max_active = val
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,215 @@
1
+ # encoding: utf-8
2
+ require "logstash/util/loggable"
3
+
4
+ module FileWatch
5
+ # this KV collection has a watched_file storage_key (an InodeStruct) as the key
6
+ # and a SincedbValue as the value.
7
+ # the SincedbValues are built by reading the sincedb file.
8
+ class SincedbCollection
9
+ include LogStash::Util::Loggable
10
+
11
+ attr_reader :path
12
+ attr_writer :serializer
13
+
14
+ def initialize(settings)
15
+ @settings = settings
16
+ @sincedb_last_write = 0
17
+ @sincedb = {}
18
+ @serializer = SincedbRecordSerializer.new(@settings.sincedb_expiry_duration)
19
+ @path = Pathname.new(@settings.sincedb_path)
20
+ @write_method = LogStash::Environment.windows? || @path.chardev? || @path.blockdev? ? method(:non_atomic_write) : method(:atomic_write)
21
+ @full_path = @path.to_path
22
+ FileUtils.touch(@full_path)
23
+ end
24
+
25
+ def request_disk_flush
26
+ now = Time.now.to_i
27
+ delta = now - @sincedb_last_write
28
+ if delta >= @settings.sincedb_write_interval
29
+ logger.debug("writing sincedb (delta since last write = #{delta})")
30
+ sincedb_write(now)
31
+ end
32
+ end
33
+
34
+ def write(reason=nil)
35
+ logger.debug("caller requested sincedb write (#{reason})")
36
+ sincedb_write
37
+ end
38
+
39
+ def open
40
+ @time_sdb_opened = Time.now.to_f
41
+ begin
42
+ path.open do |file|
43
+ logger.debug("open: reading from #{path}")
44
+ @serializer.deserialize(file) do |key, value|
45
+ logger.debug("open: importing ... '#{key}' => '#{value}'")
46
+ set_key_value(key, value)
47
+ end
48
+ end
49
+ logger.debug("open: count of keys read: #{@sincedb.keys.size}")
50
+ rescue => e
51
+ #No existing sincedb to load
52
+ logger.debug("open: error: #{path}: #{e.inspect}")
53
+ end
54
+
55
+ end
56
+
57
+ def associate(watched_file)
58
+ logger.debug("associate: finding: #{watched_file.path}")
59
+ sincedb_value = find(watched_file)
60
+ if sincedb_value.nil?
61
+ # sincedb has no record of this inode
62
+ # and due to the window handling of many files
63
+ # this file may not be opened in this session.
64
+ # a new value will be added when the file is opened
65
+ return
66
+ end
67
+ if sincedb_value.watched_file.nil?
68
+ # not associated
69
+ if sincedb_value.path_in_sincedb.nil?
70
+ # old v1 record, assume its the same file
71
+ handle_association(sincedb_value, watched_file)
72
+ return
73
+ end
74
+ if sincedb_value.path_in_sincedb == watched_file.path
75
+ # the path on disk is the same as discovered path
76
+ # and the inode is the same.
77
+ handle_association(sincedb_value, watched_file)
78
+ return
79
+ end
80
+ # the path on disk is different from discovered unassociated path
81
+ # but they have the same key (inode)
82
+ # treat as a new file, a new value will be added when the file is opened
83
+ logger.debug("associate: matched but allocated to another - #{sincedb_value}")
84
+ sincedb_value.clear_watched_file
85
+ delete(watched_file.sincedb_key)
86
+ return
87
+ end
88
+ if sincedb_value.watched_file.equal?(watched_file) # pointer equals
89
+ logger.debug("associate: already associated - #{sincedb_value}, for path: #{watched_file.path}")
90
+ return
91
+ end
92
+ # sincedb_value.watched_file is not the discovered watched_file but they have the same key (inode)
93
+ # this means that the filename was changed during this session.
94
+ # logout the history of the old sincedb_value and remove it
95
+ # a new value will be added when the file is opened
96
+ # TODO notify about done-ness of old sincedb_value and watched_file
97
+ old_watched_file = sincedb_value.watched_file
98
+ sincedb_value.clear_watched_file
99
+ if logger.debug?
100
+ logger.debug("associate: matched but allocated to another - #{sincedb_value}")
101
+ logger.debug("associate: matched but allocated to another - old watched_file history - #{old_watched_file.recent_state_history.join(', ')}")
102
+ logger.debug("associate: matched but allocated to another - DELETING value at key `#{old_watched_file.sincedb_key}`")
103
+ end
104
+ delete(old_watched_file.sincedb_key)
105
+ end
106
+
107
+ def find(watched_file)
108
+ get(watched_file.sincedb_key).tap do |obj|
109
+ logger.debug("find for path: #{watched_file.path}, found: '#{!obj.nil?}'")
110
+ end
111
+ end
112
+
113
+ def member?(key)
114
+ @sincedb.member?(key)
115
+ end
116
+
117
+ def get(key)
118
+ @sincedb[key]
119
+ end
120
+
121
+ def delete(key)
122
+ @sincedb.delete(key)
123
+ end
124
+
125
+ def last_read(key)
126
+ @sincedb[key].position
127
+ end
128
+
129
+ def rewind(key)
130
+ @sincedb[key].update_position(0)
131
+ end
132
+
133
+ def store_last_read(key, last_read)
134
+ @sincedb[key].update_position(last_read)
135
+ end
136
+
137
+ def increment(key, amount)
138
+ @sincedb[key].increment_position(amount)
139
+ end
140
+
141
+ def set_watched_file(key, watched_file)
142
+ @sincedb[key].set_watched_file(watched_file)
143
+ end
144
+
145
+ def unset_watched_file(watched_file)
146
+ return unless member?(watched_file.sincedb_key)
147
+ get(watched_file.sincedb_key).unset_watched_file
148
+ end
149
+
150
+ def clear
151
+ @sincedb.clear
152
+ end
153
+
154
+ def keys
155
+ @sincedb.keys
156
+ end
157
+
158
+ def set(key, value)
159
+ @sincedb[key] = value
160
+ value
161
+ end
162
+
163
+ def watched_file_unset?(key)
164
+ return false unless member?(key)
165
+ get(key).watched_file.nil?
166
+ end
167
+
168
+ private
169
+
170
+ def handle_association(sincedb_value, watched_file)
171
+ watched_file.update_bytes_read(sincedb_value.position)
172
+ sincedb_value.set_watched_file(watched_file)
173
+ watched_file.initial_completed
174
+ watched_file.ignore if watched_file.all_read?
175
+ end
176
+
177
+ def set_key_value(key, value)
178
+ if @time_sdb_opened < value.last_changed_at_expires(@settings.sincedb_expiry_duration)
179
+ logger.debug("open: setting #{key.inspect} to #{value.inspect}")
180
+ set(key, value)
181
+ else
182
+ logger.debug("open: record has expired, skipping: #{key.inspect} #{value.inspect}")
183
+ end
184
+ end
185
+
186
+ def sincedb_write(time = Time.now.to_i)
187
+ logger.debug("sincedb_write: to: #{path}")
188
+ begin
189
+ @write_method.call
190
+ @serializer.expired_keys.each do |key|
191
+ @sincedb[key].unset_watched_file
192
+ delete(key)
193
+ logger.debug("sincedb_write: cleaned", "key" => "'#{key}'")
194
+ end
195
+ @sincedb_last_write = time
196
+ rescue Errno::EACCES
197
+ # no file handles free perhaps
198
+ # maybe it will work next time
199
+ logger.debug("sincedb_write: error: #{path}: #{$!}")
200
+ end
201
+ end
202
+
203
+ def atomic_write
204
+ FileHelper.write_atomically(@full_path) do |io|
205
+ @serializer.serialize(@sincedb, io)
206
+ end
207
+ end
208
+
209
+ def non_atomic_write
210
+ IO.open(@full_path, 0) do |io|
211
+ @serializer.serialize(@sincedb, io)
212
+ end
213
+ end
214
+ end
215
+ end
@@ -0,0 +1,70 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch
4
+ class SincedbRecordSerializer
5
+
6
+ attr_reader :expired_keys
7
+
8
+ def initialize(sincedb_value_expiry)
9
+ @sincedb_value_expiry = sincedb_value_expiry
10
+ @expired_keys = []
11
+ end
12
+
13
+ def update_sincedb_value_expiry_from_days(days)
14
+ @sincedb_value_expiry = Settings.days_to_seconds(days)
15
+ end
16
+
17
+ def serialize(db, io, as_of = Time.now.to_f)
18
+ @expired_keys.clear
19
+ db.each do |key, value|
20
+ if as_of > value.last_changed_at_expires(@sincedb_value_expiry)
21
+ @expired_keys << key
22
+ next
23
+ end
24
+ io.write(serialize_record(key, value))
25
+ end
26
+ end
27
+
28
+ def deserialize(io)
29
+ io.each do |record|
30
+ yield deserialize_record(record) #.tap{|val| STDERR.puts val}
31
+ end
32
+ end
33
+
34
+ def serialize_record(k, v)
35
+ # effectively InodeStruct#to_s SincedbValue#to_s
36
+ "#{k} #{v}\n"
37
+ end
38
+
39
+ def deserialize_record(record)
40
+ return [] if record.nil? || record.empty?
41
+ parts = record.split(" ")
42
+ parse_line_v2(parts) || parse_line_v1(parts)
43
+ end
44
+
45
+ private
46
+
47
+ def parse_line_v2(parts)
48
+ # new format e.g. 2977152 1 4 94 1519319662.852678 'path/to/file'
49
+ # do we want to store the last known state of the watched file too?
50
+ return false if parts.size < 5
51
+ inode_struct = prepare_inode_struct(parts)
52
+ pos = parts.shift.to_i
53
+ expires_at = Float(parts.shift) # this is like Time.now.to_f
54
+ path_in_sincedb = parts.shift
55
+ value = SincedbValue.new(pos, expires_at).add_path_in_sincedb(path_in_sincedb)
56
+ [inode_struct, value]
57
+ end
58
+
59
+ def parse_line_v1(parts)
60
+ # old inode based e.g. 2977152 1 4 94
61
+ inode_struct = prepare_inode_struct(parts)
62
+ pos = parts.shift.to_i
63
+ [inode_struct, SincedbValue.new(pos)]
64
+ end
65
+
66
+ def prepare_inode_struct(parts)
67
+ InodeStruct.new(parts.shift, *parts.shift(2).map(&:to_i))
68
+ end
69
+ end
70
+ end