logstash-input-file 4.1.3 → 4.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/JAR_VERSION +1 -1
  4. data/README.md +0 -3
  5. data/docs/index.asciidoc +26 -16
  6. data/lib/filewatch/bootstrap.rb +10 -21
  7. data/lib/filewatch/discoverer.rb +35 -28
  8. data/lib/filewatch/observing_base.rb +2 -1
  9. data/lib/filewatch/read_mode/handlers/base.rb +19 -6
  10. data/lib/filewatch/read_mode/handlers/read_file.rb +43 -32
  11. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +8 -3
  12. data/lib/filewatch/read_mode/processor.rb +8 -8
  13. data/lib/filewatch/settings.rb +3 -3
  14. data/lib/filewatch/sincedb_collection.rb +56 -42
  15. data/lib/filewatch/sincedb_value.rb +6 -0
  16. data/lib/filewatch/stat/generic.rb +34 -0
  17. data/lib/filewatch/stat/windows_path.rb +32 -0
  18. data/lib/filewatch/tail_mode/handlers/base.rb +40 -22
  19. data/lib/filewatch/tail_mode/handlers/create.rb +1 -2
  20. data/lib/filewatch/tail_mode/handlers/create_initial.rb +2 -1
  21. data/lib/filewatch/tail_mode/handlers/delete.rb +13 -1
  22. data/lib/filewatch/tail_mode/handlers/grow.rb +5 -2
  23. data/lib/filewatch/tail_mode/handlers/shrink.rb +7 -4
  24. data/lib/filewatch/tail_mode/handlers/unignore.rb +4 -2
  25. data/lib/filewatch/tail_mode/processor.rb +147 -58
  26. data/lib/filewatch/watch.rb +15 -35
  27. data/lib/filewatch/watched_file.rb +237 -41
  28. data/lib/filewatch/watched_files_collection.rb +2 -2
  29. data/lib/filewatch/winhelper.rb +167 -25
  30. data/lib/jars/filewatch-1.0.1.jar +0 -0
  31. data/lib/logstash/inputs/file.rb +9 -2
  32. data/logstash-input-file.gemspec +9 -2
  33. data/spec/file_ext/file_ext_windows_spec.rb +36 -0
  34. data/spec/filewatch/read_mode_handlers_read_file_spec.rb +2 -2
  35. data/spec/filewatch/reading_spec.rb +100 -57
  36. data/spec/filewatch/rotate_spec.rb +451 -0
  37. data/spec/filewatch/spec_helper.rb +33 -10
  38. data/spec/filewatch/tailing_spec.rb +273 -153
  39. data/spec/filewatch/watched_file_spec.rb +3 -3
  40. data/spec/filewatch/watched_files_collection_spec.rb +3 -3
  41. data/spec/filewatch/winhelper_spec.rb +4 -5
  42. data/spec/helpers/logging_level_helper.rb +8 -0
  43. data/spec/helpers/rspec_wait_handler_helper.rb +38 -0
  44. data/spec/helpers/spec_helper.rb +7 -1
  45. data/spec/inputs/file_read_spec.rb +54 -24
  46. data/spec/inputs/file_tail_spec.rb +244 -284
  47. metadata +13 -3
  48. data/lib/jars/filewatch-1.0.0.jar +0 -0
@@ -39,16 +39,16 @@ module FileWatch module ReadMode
39
39
  @read_zip_file.handle(watched_file)
40
40
  end
41
41
 
42
- def process_closed(watched_files)
43
- # do not process watched_files in the closed state.
42
+ def process_all_states(watched_files)
43
+ process_watched(watched_files)
44
+ return if watch.quit?
45
+ process_active(watched_files)
44
46
  end
45
47
 
46
- def process_ignored(watched_files)
47
- # do not process watched_files in the ignored state.
48
- end
48
+ private
49
49
 
50
50
  def process_watched(watched_files)
51
- logger.debug("Watched processing")
51
+ logger.trace("Watched processing")
52
52
  # Handles watched_files in the watched state.
53
53
  # for a slice of them:
54
54
  # move to the active state
@@ -81,7 +81,7 @@ module FileWatch module ReadMode
81
81
  end
82
82
 
83
83
  def process_active(watched_files)
84
- logger.debug("Active processing")
84
+ logger.trace("Active processing")
85
85
  # Handles watched_files in the active state.
86
86
  watched_files.select {|wf| wf.active? }.each do |watched_file|
87
87
  path = watched_file.path
@@ -109,7 +109,7 @@ module FileWatch module ReadMode
109
109
  # file has gone away or we can't read it anymore.
110
110
  watched_file.unwatch
111
111
  deletable_filepaths << watched_file.path
112
- logger.debug("#{action} - stat failed: #{watched_file.path}, removing from collection")
112
+ logger.trace("#{action} - stat failed: #{watched_file.path}, removing from collection")
113
113
  end
114
114
 
115
115
  def common_error_reaction(path, error, action)
@@ -21,8 +21,8 @@ module FileWatch
21
21
  defaults = {
22
22
  :delimiter => "\n",
23
23
  :file_chunk_size => FILE_READ_SIZE,
24
- :max_active => 4095,
25
- :file_chunk_count => FIXNUM_MAX,
24
+ :max_open_files => 4095,
25
+ :file_chunk_count => MAX_ITERATIONS,
26
26
  :sincedb_clean_after => 14,
27
27
  :exclude => [],
28
28
  :stat_interval => 1,
@@ -37,7 +37,7 @@ module FileWatch
37
37
 
38
38
  def add_options(opts)
39
39
  @opts.update(opts)
40
- self.max_open_files = @opts[:max_active]
40
+ self.max_open_files = @opts[:max_open_files]
41
41
  @delimiter = @opts[:delimiter]
42
42
  @delimiter_byte_size = @delimiter.bytesize
43
43
  @file_chunk_size = @opts[:file_chunk_size]
@@ -39,7 +39,7 @@ module FileWatch
39
39
  end
40
40
 
41
41
  def write(reason=nil)
42
- logger.debug("caller requested sincedb write (#{reason})")
42
+ logger.trace("caller requested sincedb write (#{reason})")
43
43
  sincedb_write
44
44
  end
45
45
 
@@ -47,73 +47,72 @@ module FileWatch
47
47
  @time_sdb_opened = Time.now.to_f
48
48
  begin
49
49
  path.open do |file|
50
- logger.debug("open: reading from #{path}")
50
+ logger.trace("open: reading from #{path}")
51
51
  @serializer.deserialize(file) do |key, value|
52
- logger.debug("open: importing ... '#{key}' => '#{value}'")
52
+ logger.trace("open: importing ... '#{key}' => '#{value}'")
53
53
  set_key_value(key, value)
54
54
  end
55
55
  end
56
- logger.debug("open: count of keys read: #{@sincedb.keys.size}")
56
+ logger.trace("open: count of keys read: #{@sincedb.keys.size}")
57
57
  rescue => e
58
58
  #No existing sincedb to load
59
- logger.debug("open: error: #{path}: #{e.inspect}")
59
+ logger.trace("open: error: #{path}: #{e.inspect}")
60
60
  end
61
61
  end
62
62
 
63
63
  def associate(watched_file)
64
- logger.debug("associate: finding: #{watched_file.path}")
64
+ logger.trace("associate: finding", "inode" => watched_file.sincedb_key.inode, "path" => watched_file.path)
65
65
  sincedb_value = find(watched_file)
66
66
  if sincedb_value.nil?
67
67
  # sincedb has no record of this inode
68
68
  # and due to the window handling of many files
69
69
  # this file may not be opened in this session.
70
70
  # a new value will be added when the file is opened
71
- return
71
+ logger.trace("associate: unmatched")
72
+ return true
72
73
  end
74
+ logger.trace("associate: found sincedb record", "filename" => watched_file.filename, "sincedb key" => watched_file.sincedb_key,"sincedb_value" => sincedb_value)
73
75
  if sincedb_value.watched_file.nil?
74
76
  # not associated
75
77
  if sincedb_value.path_in_sincedb.nil?
76
- # old v1 record, assume its the same file
77
78
  handle_association(sincedb_value, watched_file)
78
- return
79
+ logger.trace("associate: inode matched but no path in sincedb")
80
+ return true
79
81
  end
80
82
  if sincedb_value.path_in_sincedb == watched_file.path
81
83
  # the path on disk is the same as discovered path
82
84
  # and the inode is the same.
83
85
  handle_association(sincedb_value, watched_file)
84
- return
86
+ logger.trace("associate: inode and path matched")
87
+ return true
85
88
  end
86
89
  # the path on disk is different from discovered unassociated path
87
90
  # but they have the same key (inode)
88
91
  # treat as a new file, a new value will be added when the file is opened
89
- logger.debug("associate: matched but allocated to another - #{sincedb_value}")
90
92
  sincedb_value.clear_watched_file
91
93
  delete(watched_file.sincedb_key)
92
- return
94
+ logger.trace("associate: matched but allocated to another")
95
+ return true
93
96
  end
94
97
  if sincedb_value.watched_file.equal?(watched_file) # pointer equals
95
- logger.debug("associate: already associated - #{sincedb_value}, for path: #{watched_file.path}")
96
- return
98
+ logger.trace("associate: already associated")
99
+ return true
97
100
  end
98
- # sincedb_value.watched_file is not the discovered watched_file but they have the same key (inode)
99
- # this means that the filename was changed during this session.
100
- # logout the history of the old sincedb_value and remove it
101
- # a new value will be added when the file is opened
102
- # TODO notify about done-ness of old sincedb_value and watched_file
103
- old_watched_file = sincedb_value.watched_file
104
- sincedb_value.clear_watched_file
105
- if logger.debug?
106
- logger.debug("associate: matched but allocated to another - #{sincedb_value}")
107
- logger.debug("associate: matched but allocated to another - old watched_file history - #{old_watched_file.recent_state_history.join(', ')}")
108
- logger.debug("associate: matched but allocated to another - DELETING value at key `#{old_watched_file.sincedb_key}`")
109
- end
110
- delete(old_watched_file.sincedb_key)
101
+ # sincedb_value.watched_file is not this discovered watched_file but they have the same key (inode)
102
+ # this means that the filename path was changed during this session.
103
+ # renamed file can be discovered...
104
+ # before the original is detected as deleted: state is `active`
105
+ # after the original is detected as deleted but before it is actually deleted: state is `delayed_delete`
106
+ # after the original is deleted
107
+ # are not yet in the delete phase, let this play out
108
+ existing_watched_file = sincedb_value.watched_file
109
+ logger.trace("----------------- >> associate: the found sincedb_value has a watched_file - this is a rename", "this watched_file details" => watched_file.details, "other watched_file details" => existing_watched_file.details)
110
+ watched_file.rotation_in_progress
111
+ true
111
112
  end
112
113
 
113
114
  def find(watched_file)
114
- get(watched_file.sincedb_key).tap do |obj|
115
- logger.debug("find for path: #{watched_file.path}, found: '#{!obj.nil?}'")
116
- end
115
+ get(watched_file.sincedb_key)
117
116
  end
118
117
 
119
118
  def member?(key)
@@ -124,6 +123,11 @@ module FileWatch
124
123
  @sincedb[key]
125
124
  end
126
125
 
126
+ def set(key, value)
127
+ @sincedb[key] = value
128
+ value
129
+ end
130
+
127
131
  def delete(key)
128
132
  @sincedb.delete(key)
129
133
  end
@@ -144,11 +148,23 @@ module FileWatch
144
148
  @sincedb[key].set_watched_file(watched_file)
145
149
  end
146
150
 
147
- def unset_watched_file(watched_file)
151
+ def watched_file_deleted(watched_file)
148
152
  return unless member?(watched_file.sincedb_key)
149
153
  get(watched_file.sincedb_key).unset_watched_file
150
154
  end
151
155
 
156
+ def store_last_read(key, pos)
157
+ @sincedb[key].update_position(pos)
158
+ end
159
+
160
+ def clear_watched_file(key)
161
+ @sincedb[key].clear_watched_file
162
+ end
163
+
164
+ def reading_completed(key)
165
+ @sincedb[key].reading_completed
166
+ end
167
+
152
168
  def clear
153
169
  @sincedb.clear
154
170
  end
@@ -157,11 +173,6 @@ module FileWatch
157
173
  @sincedb.keys
158
174
  end
159
175
 
160
- def set(key, value)
161
- @sincedb[key] = value
162
- value
163
- end
164
-
165
176
  def watched_file_unset?(key)
166
177
  return false unless member?(key)
167
178
  get(key).watched_file.nil?
@@ -182,33 +193,36 @@ module FileWatch
182
193
  watched_file.update_bytes_read(sincedb_value.position)
183
194
  sincedb_value.set_watched_file(watched_file)
184
195
  watched_file.initial_completed
185
- watched_file.ignore if watched_file.all_read?
196
+ if watched_file.all_read?
197
+ watched_file.ignore
198
+ logger.trace("handle_association fully read, ignoring.....", "watched file" => watched_file.details, "sincedb value" => sincedb_value)
199
+ end
186
200
  end
187
201
 
188
202
  def set_key_value(key, value)
189
203
  if @time_sdb_opened < value.last_changed_at_expires(@settings.sincedb_expiry_duration)
190
- logger.debug("open: setting #{key.inspect} to #{value.inspect}")
204
+ logger.trace("open: setting #{key.inspect} to #{value.inspect}")
191
205
  set(key, value)
192
206
  else
193
- logger.debug("open: record has expired, skipping: #{key.inspect} #{value.inspect}")
207
+ logger.trace("open: record has expired, skipping: #{key.inspect} #{value.inspect}")
194
208
  end
195
209
  end
196
210
 
197
211
  def sincedb_write(time = Time.now.to_i)
198
- logger.debug("sincedb_write: to: #{path}")
212
+ logger.trace("sincedb_write: to: #{path}")
199
213
  begin
200
214
  @write_method.call
201
215
  @serializer.expired_keys.each do |key|
202
216
  @sincedb[key].unset_watched_file
203
217
  delete(key)
204
- logger.debug("sincedb_write: cleaned", "key" => "'#{key}'")
218
+ logger.trace("sincedb_write: cleaned", "key" => "'#{key}'")
205
219
  end
206
220
  @sincedb_last_write = time
207
221
  @write_requested = false
208
222
  rescue Errno::EACCES
209
223
  # no file handles free perhaps
210
224
  # maybe it will work next time
211
- logger.debug("sincedb_write: error: #{path}: #{$!}")
225
+ logger.trace("sincedb_write: error: #{path}: #{$!}")
212
226
  end
213
227
  end
214
228
 
@@ -66,6 +66,12 @@ module FileWatch
66
66
  @watched_file = nil
67
67
  end
68
68
 
69
+ def reading_completed
70
+ touch
71
+ @path_in_sincedb = @watched_file.path
72
+ @position = @watched_file.bytes_read
73
+ end
74
+
69
75
  def unset_watched_file
70
76
  # called in read mode only because we flushed any remaining bytes as a final line.
71
77
  # cache the position
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch module Stat
4
+ class Generic
5
+
6
+ attr_reader :identifier, :inode, :modified_at, :size, :inode_struct
7
+
8
+ def initialize(source)
9
+ @source = source
10
+ @identifier = nil
11
+ restat
12
+ end
13
+
14
+ def add_identifier(identifier) self; end
15
+
16
+ def restat
17
+ @inner_stat = @source.stat
18
+ @inode = @inner_stat.ino.to_s
19
+ @modified_at = @inner_stat.mtime.to_f
20
+ @size = @inner_stat.size
21
+ @dev_major = @inner_stat.dev_major
22
+ @dev_minor = @inner_stat.dev_minor
23
+ @inode_struct = InodeStruct.new(@inode, @dev_major, @dev_minor)
24
+ end
25
+
26
+ def windows?
27
+ false
28
+ end
29
+
30
+ def inspect
31
+ "<Generic size='#{@size}', modified_at='#{@modified_at}', inode='#{@inode}', inode_struct='#{@inode_struct}'>"
32
+ end
33
+ end
34
+ end end
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch module Stat
4
+ class WindowsPath
5
+
6
+ attr_reader :identifier, :inode, :modified_at, :size, :inode_struct
7
+
8
+ def initialize(source)
9
+ @source = source
10
+ @inode = Winhelper.identifier_from_path(@source.to_path)
11
+ @dev_major = 0
12
+ @dev_minor = 0
13
+ # in windows the dev hi and low are in the identifier
14
+ @inode_struct = InodeStruct.new(@inode, @dev_major, @dev_minor)
15
+ restat
16
+ end
17
+
18
+ def restat
19
+ @inner_stat = @source.stat
20
+ @modified_at = @inner_stat.mtime.to_f
21
+ @size = @inner_stat.size
22
+ end
23
+
24
+ def windows?
25
+ true
26
+ end
27
+
28
+ def inspect
29
+ "<WindowsPath size='#{@size}', modified_at='#{@modified_at}', inode='#{@inode}', inode_struct='#{@inode_struct}'>"
30
+ end
31
+ end
32
+ end end
@@ -13,7 +13,7 @@ module FileWatch module TailMode module Handlers
13
13
  end
14
14
 
15
15
  def handle(watched_file)
16
- logger.debug("handling: #{watched_file.path}")
16
+ logger.trace("handling: #{watched_file.filename}")
17
17
  unless watched_file.has_listener?
18
18
  watched_file.set_listener(@observer)
19
19
  end
@@ -30,8 +30,9 @@ module FileWatch module TailMode module Handlers
30
30
 
31
31
  private
32
32
 
33
- def read_to_eof(watched_file)
33
+ def controlled_read(watched_file, loop_control)
34
34
  changed = false
35
+ logger.trace("reading...", "iterations" => loop_control.count, "amount" => loop_control.size, "filename" => watched_file.filename)
35
36
  # from a real config (has 102 file inputs)
36
37
  # -- This cfg creates a file input for every log file to create a dedicated file pointer and read all file simultaneously
37
38
  # -- If we put all log files in one file input glob we will have indexing delay, because Logstash waits until the first file becomes EOF
@@ -39,20 +40,16 @@ module FileWatch module TailMode module Handlers
39
40
  # we enable the pseudo parallel processing of each file.
40
41
  # user also has the option to specify a low `stat_interval` and a very high `discover_interval`to respond
41
42
  # quicker to changing files and not allowing too much content to build up before reading it.
42
- @settings.file_chunk_count.times do
43
+ loop_control.count.times do
43
44
  begin
44
- data = watched_file.file_read(@settings.file_chunk_size)
45
- result = watched_file.buffer_extract(data) # expect BufferExtractResult
46
- logger.info(result.warning, result.additional) unless result.warning.empty?
45
+ result = watched_file.read_extract_lines(loop_control.size) # expect BufferExtractResult
46
+ logger.trace(result.warning, result.additional) unless result.warning.empty?
47
47
  changed = true
48
48
  result.lines.each do |line|
49
49
  watched_file.listener.accept(line)
50
50
  # sincedb position is now independent from the watched_file bytes_read
51
51
  sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size)
52
52
  end
53
- # instead of tracking the bytes_read line by line we need to track by the data read size.
54
- # because we seek to the bytes_read not the sincedb position
55
- watched_file.increment_bytes_read(data.bytesize)
56
53
  rescue EOFError
57
54
  # it only makes sense to signal EOF in "read" mode not "tail"
58
55
  break
@@ -70,7 +67,7 @@ module FileWatch module TailMode module Handlers
70
67
 
71
68
  def open_file(watched_file)
72
69
  return true if watched_file.file_open?
73
- logger.debug("opening #{watched_file.path}")
70
+ logger.trace("opening #{watched_file.filename}")
74
71
  begin
75
72
  watched_file.open
76
73
  rescue
@@ -82,43 +79,64 @@ module FileWatch module TailMode module Handlers
82
79
  logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}")
83
80
  watched_file.last_open_warning_at = now
84
81
  else
85
- logger.debug("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}")
82
+ logger.trace("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}")
86
83
  end
87
84
  watched_file.watch # set it back to watch so we can try it again
88
- end
89
- if watched_file.file_open?
90
- watched_file.listener.opened
91
- true
92
85
  else
93
- false
86
+ watched_file.listener.opened
94
87
  end
88
+ watched_file.file_open?
95
89
  end
96
90
 
97
91
  def add_or_update_sincedb_collection(watched_file)
98
92
  sincedb_value = @sincedb_collection.find(watched_file)
99
93
  if sincedb_value.nil?
100
- add_new_value_sincedb_collection(watched_file)
94
+ sincedb_value = add_new_value_sincedb_collection(watched_file)
95
+ watched_file.initial_completed
101
96
  elsif sincedb_value.watched_file == watched_file
102
97
  update_existing_sincedb_collection_value(watched_file, sincedb_value)
98
+ watched_file.initial_completed
103
99
  else
104
- logger.warn? && logger.warn("mismatch on sincedb_value.watched_file, this should have been handled by Discoverer")
100
+ msg = "add_or_update_sincedb_collection: found sincedb record"
101
+ logger.trace(msg,
102
+ "sincedb key" => watched_file.sincedb_key,
103
+ "sincedb value" => sincedb_value
104
+ )
105
+ # detected a rotation, Discoverer can't handle this because this watched file is not a new discovery.
106
+ # we must handle it here, by transferring state and have the sincedb value track this watched file
107
+ # rotate_as_file and rotate_from will switch the sincedb key to the inode that the path is now pointing to
108
+ # and pickup the sincedb_value from before.
109
+ msg = "add_or_update_sincedb_collection: the found sincedb_value has a watched_file - this is a rename, switching inode to this watched file"
110
+ logger.trace(msg)
111
+ existing_watched_file = sincedb_value.watched_file
112
+ if existing_watched_file.nil?
113
+ sincedb_value.set_watched_file(watched_file)
114
+ logger.trace("add_or_update_sincedb_collection: switching as new file")
115
+ watched_file.rotate_as_file
116
+ watched_file.update_bytes_read(sincedb_value.position)
117
+ else
118
+ sincedb_value.set_watched_file(watched_file)
119
+ logger.trace("add_or_update_sincedb_collection: switching from...", "watched_file details" => watched_file.details)
120
+ watched_file.rotate_from(existing_watched_file)
121
+ end
105
122
  end
106
- watched_file.initial_completed
123
+ sincedb_value
107
124
  end
108
125
 
109
126
  def update_existing_sincedb_collection_value(watched_file, sincedb_value)
110
- logger.debug("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}")
127
+ logger.trace("update_existing_sincedb_collection_value: #{watched_file.filename}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}")
111
128
  update_existing_specifically(watched_file, sincedb_value)
112
129
  end
113
130
 
114
131
  def add_new_value_sincedb_collection(watched_file)
115
132
  sincedb_value = get_new_value_specifically(watched_file)
116
- logger.debug("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position)
133
+ logger.trace("add_new_value_sincedb_collection", "position" => sincedb_value.position, "watched_file details" => watched_file.details)
117
134
  sincedb_collection.set(watched_file.sincedb_key, sincedb_value)
135
+ sincedb_value
118
136
  end
119
137
 
120
138
  def get_new_value_specifically(watched_file)
121
- position = @settings.start_new_files_at == :beginning ? 0 : watched_file.last_stat_size
139
+ position = watched_file.position_for_new_sincedb_value
122
140
  value = SincedbValue.new(position)
123
141
  value.set_watched_file(watched_file)
124
142
  watched_file.update_bytes_read(position)