logstash-input-file 4.1.3 → 4.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/JAR_VERSION +1 -1
  4. data/README.md +0 -3
  5. data/docs/index.asciidoc +26 -16
  6. data/lib/filewatch/bootstrap.rb +10 -21
  7. data/lib/filewatch/discoverer.rb +35 -28
  8. data/lib/filewatch/observing_base.rb +2 -1
  9. data/lib/filewatch/read_mode/handlers/base.rb +19 -6
  10. data/lib/filewatch/read_mode/handlers/read_file.rb +43 -32
  11. data/lib/filewatch/read_mode/handlers/read_zip_file.rb +8 -3
  12. data/lib/filewatch/read_mode/processor.rb +8 -8
  13. data/lib/filewatch/settings.rb +3 -3
  14. data/lib/filewatch/sincedb_collection.rb +56 -42
  15. data/lib/filewatch/sincedb_value.rb +6 -0
  16. data/lib/filewatch/stat/generic.rb +34 -0
  17. data/lib/filewatch/stat/windows_path.rb +32 -0
  18. data/lib/filewatch/tail_mode/handlers/base.rb +40 -22
  19. data/lib/filewatch/tail_mode/handlers/create.rb +1 -2
  20. data/lib/filewatch/tail_mode/handlers/create_initial.rb +2 -1
  21. data/lib/filewatch/tail_mode/handlers/delete.rb +13 -1
  22. data/lib/filewatch/tail_mode/handlers/grow.rb +5 -2
  23. data/lib/filewatch/tail_mode/handlers/shrink.rb +7 -4
  24. data/lib/filewatch/tail_mode/handlers/unignore.rb +4 -2
  25. data/lib/filewatch/tail_mode/processor.rb +147 -58
  26. data/lib/filewatch/watch.rb +15 -35
  27. data/lib/filewatch/watched_file.rb +237 -41
  28. data/lib/filewatch/watched_files_collection.rb +2 -2
  29. data/lib/filewatch/winhelper.rb +167 -25
  30. data/lib/jars/filewatch-1.0.1.jar +0 -0
  31. data/lib/logstash/inputs/file.rb +9 -2
  32. data/logstash-input-file.gemspec +9 -2
  33. data/spec/file_ext/file_ext_windows_spec.rb +36 -0
  34. data/spec/filewatch/read_mode_handlers_read_file_spec.rb +2 -2
  35. data/spec/filewatch/reading_spec.rb +100 -57
  36. data/spec/filewatch/rotate_spec.rb +451 -0
  37. data/spec/filewatch/spec_helper.rb +33 -10
  38. data/spec/filewatch/tailing_spec.rb +273 -153
  39. data/spec/filewatch/watched_file_spec.rb +3 -3
  40. data/spec/filewatch/watched_files_collection_spec.rb +3 -3
  41. data/spec/filewatch/winhelper_spec.rb +4 -5
  42. data/spec/helpers/logging_level_helper.rb +8 -0
  43. data/spec/helpers/rspec_wait_handler_helper.rb +38 -0
  44. data/spec/helpers/spec_helper.rb +7 -1
  45. data/spec/inputs/file_read_spec.rb +54 -24
  46. data/spec/inputs/file_tail_spec.rb +244 -284
  47. metadata +13 -3
  48. data/lib/jars/filewatch-1.0.0.jar +0 -0
@@ -39,16 +39,16 @@ module FileWatch module ReadMode
39
39
  @read_zip_file.handle(watched_file)
40
40
  end
41
41
 
42
- def process_closed(watched_files)
43
- # do not process watched_files in the closed state.
42
+ def process_all_states(watched_files)
43
+ process_watched(watched_files)
44
+ return if watch.quit?
45
+ process_active(watched_files)
44
46
  end
45
47
 
46
- def process_ignored(watched_files)
47
- # do not process watched_files in the ignored state.
48
- end
48
+ private
49
49
 
50
50
  def process_watched(watched_files)
51
- logger.debug("Watched processing")
51
+ logger.trace("Watched processing")
52
52
  # Handles watched_files in the watched state.
53
53
  # for a slice of them:
54
54
  # move to the active state
@@ -81,7 +81,7 @@ module FileWatch module ReadMode
81
81
  end
82
82
 
83
83
  def process_active(watched_files)
84
- logger.debug("Active processing")
84
+ logger.trace("Active processing")
85
85
  # Handles watched_files in the active state.
86
86
  watched_files.select {|wf| wf.active? }.each do |watched_file|
87
87
  path = watched_file.path
@@ -109,7 +109,7 @@ module FileWatch module ReadMode
109
109
  # file has gone away or we can't read it anymore.
110
110
  watched_file.unwatch
111
111
  deletable_filepaths << watched_file.path
112
- logger.debug("#{action} - stat failed: #{watched_file.path}, removing from collection")
112
+ logger.trace("#{action} - stat failed: #{watched_file.path}, removing from collection")
113
113
  end
114
114
 
115
115
  def common_error_reaction(path, error, action)
@@ -21,8 +21,8 @@ module FileWatch
21
21
  defaults = {
22
22
  :delimiter => "\n",
23
23
  :file_chunk_size => FILE_READ_SIZE,
24
- :max_active => 4095,
25
- :file_chunk_count => FIXNUM_MAX,
24
+ :max_open_files => 4095,
25
+ :file_chunk_count => MAX_ITERATIONS,
26
26
  :sincedb_clean_after => 14,
27
27
  :exclude => [],
28
28
  :stat_interval => 1,
@@ -37,7 +37,7 @@ module FileWatch
37
37
 
38
38
  def add_options(opts)
39
39
  @opts.update(opts)
40
- self.max_open_files = @opts[:max_active]
40
+ self.max_open_files = @opts[:max_open_files]
41
41
  @delimiter = @opts[:delimiter]
42
42
  @delimiter_byte_size = @delimiter.bytesize
43
43
  @file_chunk_size = @opts[:file_chunk_size]
@@ -39,7 +39,7 @@ module FileWatch
39
39
  end
40
40
 
41
41
  def write(reason=nil)
42
- logger.debug("caller requested sincedb write (#{reason})")
42
+ logger.trace("caller requested sincedb write (#{reason})")
43
43
  sincedb_write
44
44
  end
45
45
 
@@ -47,73 +47,72 @@ module FileWatch
47
47
  @time_sdb_opened = Time.now.to_f
48
48
  begin
49
49
  path.open do |file|
50
- logger.debug("open: reading from #{path}")
50
+ logger.trace("open: reading from #{path}")
51
51
  @serializer.deserialize(file) do |key, value|
52
- logger.debug("open: importing ... '#{key}' => '#{value}'")
52
+ logger.trace("open: importing ... '#{key}' => '#{value}'")
53
53
  set_key_value(key, value)
54
54
  end
55
55
  end
56
- logger.debug("open: count of keys read: #{@sincedb.keys.size}")
56
+ logger.trace("open: count of keys read: #{@sincedb.keys.size}")
57
57
  rescue => e
58
58
  #No existing sincedb to load
59
- logger.debug("open: error: #{path}: #{e.inspect}")
59
+ logger.trace("open: error: #{path}: #{e.inspect}")
60
60
  end
61
61
  end
62
62
 
63
63
  def associate(watched_file)
64
- logger.debug("associate: finding: #{watched_file.path}")
64
+ logger.trace("associate: finding", "inode" => watched_file.sincedb_key.inode, "path" => watched_file.path)
65
65
  sincedb_value = find(watched_file)
66
66
  if sincedb_value.nil?
67
67
  # sincedb has no record of this inode
68
68
  # and due to the window handling of many files
69
69
  # this file may not be opened in this session.
70
70
  # a new value will be added when the file is opened
71
- return
71
+ logger.trace("associate: unmatched")
72
+ return true
72
73
  end
74
+ logger.trace("associate: found sincedb record", "filename" => watched_file.filename, "sincedb key" => watched_file.sincedb_key,"sincedb_value" => sincedb_value)
73
75
  if sincedb_value.watched_file.nil?
74
76
  # not associated
75
77
  if sincedb_value.path_in_sincedb.nil?
76
- # old v1 record, assume its the same file
77
78
  handle_association(sincedb_value, watched_file)
78
- return
79
+ logger.trace("associate: inode matched but no path in sincedb")
80
+ return true
79
81
  end
80
82
  if sincedb_value.path_in_sincedb == watched_file.path
81
83
  # the path on disk is the same as discovered path
82
84
  # and the inode is the same.
83
85
  handle_association(sincedb_value, watched_file)
84
- return
86
+ logger.trace("associate: inode and path matched")
87
+ return true
85
88
  end
86
89
  # the path on disk is different from discovered unassociated path
87
90
  # but they have the same key (inode)
88
91
  # treat as a new file, a new value will be added when the file is opened
89
- logger.debug("associate: matched but allocated to another - #{sincedb_value}")
90
92
  sincedb_value.clear_watched_file
91
93
  delete(watched_file.sincedb_key)
92
- return
94
+ logger.trace("associate: matched but allocated to another")
95
+ return true
93
96
  end
94
97
  if sincedb_value.watched_file.equal?(watched_file) # pointer equals
95
- logger.debug("associate: already associated - #{sincedb_value}, for path: #{watched_file.path}")
96
- return
98
+ logger.trace("associate: already associated")
99
+ return true
97
100
  end
98
- # sincedb_value.watched_file is not the discovered watched_file but they have the same key (inode)
99
- # this means that the filename was changed during this session.
100
- # logout the history of the old sincedb_value and remove it
101
- # a new value will be added when the file is opened
102
- # TODO notify about done-ness of old sincedb_value and watched_file
103
- old_watched_file = sincedb_value.watched_file
104
- sincedb_value.clear_watched_file
105
- if logger.debug?
106
- logger.debug("associate: matched but allocated to another - #{sincedb_value}")
107
- logger.debug("associate: matched but allocated to another - old watched_file history - #{old_watched_file.recent_state_history.join(', ')}")
108
- logger.debug("associate: matched but allocated to another - DELETING value at key `#{old_watched_file.sincedb_key}`")
109
- end
110
- delete(old_watched_file.sincedb_key)
101
+ # sincedb_value.watched_file is not this discovered watched_file but they have the same key (inode)
102
+ # this means that the filename path was changed during this session.
103
+ # renamed file can be discovered...
104
+ # before the original is detected as deleted: state is `active`
105
+ # after the original is detected as deleted but before it is actually deleted: state is `delayed_delete`
106
+ # after the original is deleted
107
+ # are not yet in the delete phase, let this play out
108
+ existing_watched_file = sincedb_value.watched_file
109
+ logger.trace("----------------- >> associate: the found sincedb_value has a watched_file - this is a rename", "this watched_file details" => watched_file.details, "other watched_file details" => existing_watched_file.details)
110
+ watched_file.rotation_in_progress
111
+ true
111
112
  end
112
113
 
113
114
  def find(watched_file)
114
- get(watched_file.sincedb_key).tap do |obj|
115
- logger.debug("find for path: #{watched_file.path}, found: '#{!obj.nil?}'")
116
- end
115
+ get(watched_file.sincedb_key)
117
116
  end
118
117
 
119
118
  def member?(key)
@@ -124,6 +123,11 @@ module FileWatch
124
123
  @sincedb[key]
125
124
  end
126
125
 
126
+ def set(key, value)
127
+ @sincedb[key] = value
128
+ value
129
+ end
130
+
127
131
  def delete(key)
128
132
  @sincedb.delete(key)
129
133
  end
@@ -144,11 +148,23 @@ module FileWatch
144
148
  @sincedb[key].set_watched_file(watched_file)
145
149
  end
146
150
 
147
- def unset_watched_file(watched_file)
151
+ def watched_file_deleted(watched_file)
148
152
  return unless member?(watched_file.sincedb_key)
149
153
  get(watched_file.sincedb_key).unset_watched_file
150
154
  end
151
155
 
156
+ def store_last_read(key, pos)
157
+ @sincedb[key].update_position(pos)
158
+ end
159
+
160
+ def clear_watched_file(key)
161
+ @sincedb[key].clear_watched_file
162
+ end
163
+
164
+ def reading_completed(key)
165
+ @sincedb[key].reading_completed
166
+ end
167
+
152
168
  def clear
153
169
  @sincedb.clear
154
170
  end
@@ -157,11 +173,6 @@ module FileWatch
157
173
  @sincedb.keys
158
174
  end
159
175
 
160
- def set(key, value)
161
- @sincedb[key] = value
162
- value
163
- end
164
-
165
176
  def watched_file_unset?(key)
166
177
  return false unless member?(key)
167
178
  get(key).watched_file.nil?
@@ -182,33 +193,36 @@ module FileWatch
182
193
  watched_file.update_bytes_read(sincedb_value.position)
183
194
  sincedb_value.set_watched_file(watched_file)
184
195
  watched_file.initial_completed
185
- watched_file.ignore if watched_file.all_read?
196
+ if watched_file.all_read?
197
+ watched_file.ignore
198
+ logger.trace("handle_association fully read, ignoring.....", "watched file" => watched_file.details, "sincedb value" => sincedb_value)
199
+ end
186
200
  end
187
201
 
188
202
  def set_key_value(key, value)
189
203
  if @time_sdb_opened < value.last_changed_at_expires(@settings.sincedb_expiry_duration)
190
- logger.debug("open: setting #{key.inspect} to #{value.inspect}")
204
+ logger.trace("open: setting #{key.inspect} to #{value.inspect}")
191
205
  set(key, value)
192
206
  else
193
- logger.debug("open: record has expired, skipping: #{key.inspect} #{value.inspect}")
207
+ logger.trace("open: record has expired, skipping: #{key.inspect} #{value.inspect}")
194
208
  end
195
209
  end
196
210
 
197
211
  def sincedb_write(time = Time.now.to_i)
198
- logger.debug("sincedb_write: to: #{path}")
212
+ logger.trace("sincedb_write: to: #{path}")
199
213
  begin
200
214
  @write_method.call
201
215
  @serializer.expired_keys.each do |key|
202
216
  @sincedb[key].unset_watched_file
203
217
  delete(key)
204
- logger.debug("sincedb_write: cleaned", "key" => "'#{key}'")
218
+ logger.trace("sincedb_write: cleaned", "key" => "'#{key}'")
205
219
  end
206
220
  @sincedb_last_write = time
207
221
  @write_requested = false
208
222
  rescue Errno::EACCES
209
223
  # no file handles free perhaps
210
224
  # maybe it will work next time
211
- logger.debug("sincedb_write: error: #{path}: #{$!}")
225
+ logger.trace("sincedb_write: error: #{path}: #{$!}")
212
226
  end
213
227
  end
214
228
 
@@ -66,6 +66,12 @@ module FileWatch
66
66
  @watched_file = nil
67
67
  end
68
68
 
69
+ def reading_completed
70
+ touch
71
+ @path_in_sincedb = @watched_file.path
72
+ @position = @watched_file.bytes_read
73
+ end
74
+
69
75
  def unset_watched_file
70
76
  # called in read mode only because we flushed any remaining bytes as a final line.
71
77
  # cache the position
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch module Stat
4
+ class Generic
5
+
6
+ attr_reader :identifier, :inode, :modified_at, :size, :inode_struct
7
+
8
+ def initialize(source)
9
+ @source = source
10
+ @identifier = nil
11
+ restat
12
+ end
13
+
14
+ def add_identifier(identifier) self; end
15
+
16
+ def restat
17
+ @inner_stat = @source.stat
18
+ @inode = @inner_stat.ino.to_s
19
+ @modified_at = @inner_stat.mtime.to_f
20
+ @size = @inner_stat.size
21
+ @dev_major = @inner_stat.dev_major
22
+ @dev_minor = @inner_stat.dev_minor
23
+ @inode_struct = InodeStruct.new(@inode, @dev_major, @dev_minor)
24
+ end
25
+
26
+ def windows?
27
+ false
28
+ end
29
+
30
+ def inspect
31
+ "<Generic size='#{@size}', modified_at='#{@modified_at}', inode='#{@inode}', inode_struct='#{@inode_struct}'>"
32
+ end
33
+ end
34
+ end end
@@ -0,0 +1,32 @@
1
+ # encoding: utf-8
2
+
3
+ module FileWatch module Stat
4
+ class WindowsPath
5
+
6
+ attr_reader :identifier, :inode, :modified_at, :size, :inode_struct
7
+
8
+ def initialize(source)
9
+ @source = source
10
+ @inode = Winhelper.identifier_from_path(@source.to_path)
11
+ @dev_major = 0
12
+ @dev_minor = 0
13
+ # in windows the dev hi and low are in the identifier
14
+ @inode_struct = InodeStruct.new(@inode, @dev_major, @dev_minor)
15
+ restat
16
+ end
17
+
18
+ def restat
19
+ @inner_stat = @source.stat
20
+ @modified_at = @inner_stat.mtime.to_f
21
+ @size = @inner_stat.size
22
+ end
23
+
24
+ def windows?
25
+ true
26
+ end
27
+
28
+ def inspect
29
+ "<WindowsPath size='#{@size}', modified_at='#{@modified_at}', inode='#{@inode}', inode_struct='#{@inode_struct}'>"
30
+ end
31
+ end
32
+ end end
@@ -13,7 +13,7 @@ module FileWatch module TailMode module Handlers
13
13
  end
14
14
 
15
15
  def handle(watched_file)
16
- logger.debug("handling: #{watched_file.path}")
16
+ logger.trace("handling: #{watched_file.filename}")
17
17
  unless watched_file.has_listener?
18
18
  watched_file.set_listener(@observer)
19
19
  end
@@ -30,8 +30,9 @@ module FileWatch module TailMode module Handlers
30
30
 
31
31
  private
32
32
 
33
- def read_to_eof(watched_file)
33
+ def controlled_read(watched_file, loop_control)
34
34
  changed = false
35
+ logger.trace("reading...", "iterations" => loop_control.count, "amount" => loop_control.size, "filename" => watched_file.filename)
35
36
  # from a real config (has 102 file inputs)
36
37
  # -- This cfg creates a file input for every log file to create a dedicated file pointer and read all file simultaneously
37
38
  # -- If we put all log files in one file input glob we will have indexing delay, because Logstash waits until the first file becomes EOF
@@ -39,20 +40,16 @@ module FileWatch module TailMode module Handlers
39
40
  # we enable the pseudo parallel processing of each file.
40
41
  # user also has the option to specify a low `stat_interval` and a very high `discover_interval`to respond
41
42
  # quicker to changing files and not allowing too much content to build up before reading it.
42
- @settings.file_chunk_count.times do
43
+ loop_control.count.times do
43
44
  begin
44
- data = watched_file.file_read(@settings.file_chunk_size)
45
- result = watched_file.buffer_extract(data) # expect BufferExtractResult
46
- logger.info(result.warning, result.additional) unless result.warning.empty?
45
+ result = watched_file.read_extract_lines(loop_control.size) # expect BufferExtractResult
46
+ logger.trace(result.warning, result.additional) unless result.warning.empty?
47
47
  changed = true
48
48
  result.lines.each do |line|
49
49
  watched_file.listener.accept(line)
50
50
  # sincedb position is now independent from the watched_file bytes_read
51
51
  sincedb_collection.increment(watched_file.sincedb_key, line.bytesize + @settings.delimiter_byte_size)
52
52
  end
53
- # instead of tracking the bytes_read line by line we need to track by the data read size.
54
- # because we seek to the bytes_read not the sincedb position
55
- watched_file.increment_bytes_read(data.bytesize)
56
53
  rescue EOFError
57
54
  # it only makes sense to signal EOF in "read" mode not "tail"
58
55
  break
@@ -70,7 +67,7 @@ module FileWatch module TailMode module Handlers
70
67
 
71
68
  def open_file(watched_file)
72
69
  return true if watched_file.file_open?
73
- logger.debug("opening #{watched_file.path}")
70
+ logger.trace("opening #{watched_file.filename}")
74
71
  begin
75
72
  watched_file.open
76
73
  rescue
@@ -82,43 +79,64 @@ module FileWatch module TailMode module Handlers
82
79
  logger.warn("failed to open #{watched_file.path}: #{$!.inspect}, #{$!.backtrace.take(3)}")
83
80
  watched_file.last_open_warning_at = now
84
81
  else
85
- logger.debug("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}")
82
+ logger.trace("suppressed warning for `failed to open` #{watched_file.path}: #{$!.inspect}")
86
83
  end
87
84
  watched_file.watch # set it back to watch so we can try it again
88
- end
89
- if watched_file.file_open?
90
- watched_file.listener.opened
91
- true
92
85
  else
93
- false
86
+ watched_file.listener.opened
94
87
  end
88
+ watched_file.file_open?
95
89
  end
96
90
 
97
91
  def add_or_update_sincedb_collection(watched_file)
98
92
  sincedb_value = @sincedb_collection.find(watched_file)
99
93
  if sincedb_value.nil?
100
- add_new_value_sincedb_collection(watched_file)
94
+ sincedb_value = add_new_value_sincedb_collection(watched_file)
95
+ watched_file.initial_completed
101
96
  elsif sincedb_value.watched_file == watched_file
102
97
  update_existing_sincedb_collection_value(watched_file, sincedb_value)
98
+ watched_file.initial_completed
103
99
  else
104
- logger.warn? && logger.warn("mismatch on sincedb_value.watched_file, this should have been handled by Discoverer")
100
+ msg = "add_or_update_sincedb_collection: found sincedb record"
101
+ logger.trace(msg,
102
+ "sincedb key" => watched_file.sincedb_key,
103
+ "sincedb value" => sincedb_value
104
+ )
105
+ # detected a rotation, Discoverer can't handle this because this watched file is not a new discovery.
106
+ # we must handle it here, by transferring state and have the sincedb value track this watched file
107
+ # rotate_as_file and rotate_from will switch the sincedb key to the inode that the path is now pointing to
108
+ # and pickup the sincedb_value from before.
109
+ msg = "add_or_update_sincedb_collection: the found sincedb_value has a watched_file - this is a rename, switching inode to this watched file"
110
+ logger.trace(msg)
111
+ existing_watched_file = sincedb_value.watched_file
112
+ if existing_watched_file.nil?
113
+ sincedb_value.set_watched_file(watched_file)
114
+ logger.trace("add_or_update_sincedb_collection: switching as new file")
115
+ watched_file.rotate_as_file
116
+ watched_file.update_bytes_read(sincedb_value.position)
117
+ else
118
+ sincedb_value.set_watched_file(watched_file)
119
+ logger.trace("add_or_update_sincedb_collection: switching from...", "watched_file details" => watched_file.details)
120
+ watched_file.rotate_from(existing_watched_file)
121
+ end
105
122
  end
106
- watched_file.initial_completed
123
+ sincedb_value
107
124
  end
108
125
 
109
126
  def update_existing_sincedb_collection_value(watched_file, sincedb_value)
110
- logger.debug("update_existing_sincedb_collection_value: #{watched_file.path}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}")
127
+ logger.trace("update_existing_sincedb_collection_value: #{watched_file.filename}, last value #{sincedb_value.position}, cur size #{watched_file.last_stat_size}")
111
128
  update_existing_specifically(watched_file, sincedb_value)
112
129
  end
113
130
 
114
131
  def add_new_value_sincedb_collection(watched_file)
115
132
  sincedb_value = get_new_value_specifically(watched_file)
116
- logger.debug("add_new_value_sincedb_collection: #{watched_file.path}", "position" => sincedb_value.position)
133
+ logger.trace("add_new_value_sincedb_collection", "position" => sincedb_value.position, "watched_file details" => watched_file.details)
117
134
  sincedb_collection.set(watched_file.sincedb_key, sincedb_value)
135
+ sincedb_value
118
136
  end
119
137
 
120
138
  def get_new_value_specifically(watched_file)
121
- position = @settings.start_new_files_at == :beginning ? 0 : watched_file.last_stat_size
139
+ position = watched_file.position_for_new_sincedb_value
122
140
  value = SincedbValue.new(position)
123
141
  value.set_watched_file(watched_file)
124
142
  watched_file.update_bytes_read(position)