filewatch 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 846e8322c5eaff65f07c8fc29a38252fc012848b
4
- data.tar.gz: 2d6f2d06f3fce3016decdeb4d7f19c2fc033d63b
3
+ metadata.gz: 59ac6ba9efca68da9ea0b3342e87deab3635f53c
4
+ data.tar.gz: 5a71f0673de75e1444584b01c0dcfe56e039d2ce
5
5
  SHA512:
6
- metadata.gz: a6ddbce50ce399c9201f60c54790bf310afb0658f577ef6ae0de16f552f9c4c2aab70106eb34f06f5d8eadbcc492743bd03232030e2e39363b75bcc9988c4c1e
7
- data.tar.gz: eec3de133bc8998d9f956655a07c63725b0c69a111a93355740b1e615b7615ed3c980dc048de7927e3138d455420b6b636fbfbc04e124c26a05eb7f41d974f18
6
+ metadata.gz: 366003c080654805fa4da59a6ef354a25cb0a8495090d668ad0dd8ae628e7bf7438b5d0894938925bed8d8e2b08a6dae3671a0772480d94d210039273aaa7e01
7
+ data.tar.gz: d05f46ef57830653fbd19c3eef0d4e49fd4137f5604d0296df1991fc64ae0b6b74c926a7e2796bf528d4c55d1571f8a9b190115b88571110027bb16ebe6fe8fb
@@ -21,43 +21,45 @@ module FileWatch
21
21
 
22
22
  def subscribe(observer = NullObserver.new)
23
23
  @watch.subscribe(@opts[:stat_interval],
24
- @opts[:discover_interval]) do |event, path|
24
+ @opts[:discover_interval]) do |event, watched_file|
25
+ path = watched_file.path
26
+ file_is_open = watched_file.file_open?
25
27
  listener = observer.listener_for(path)
26
28
  case event
29
+ when :unignore
30
+ listener.created
31
+ _add_to_sincedb(watched_file, event) unless @sincedb.member?(watched_file.inode)
27
32
  when :create, :create_initial
28
- if @files.member?(path)
29
- @logger.debug? && @logger.debug("#{event} for #{path}: already exists in @files")
33
+ if file_is_open
34
+ @logger.debug? && @logger.debug("#{event} for #{path}: file already open")
30
35
  next
31
36
  end
32
- if _open_file(path, event)
37
+ if _open_file(watched_file, event)
33
38
  listener.created
34
- observe_read_file(path, listener)
39
+ observe_read_file(watched_file, listener)
35
40
  end
36
41
  when :modify
37
- if !@files.member?(path)
38
- @logger.debug? && @logger.debug(":modify for #{path}, does not exist in @files")
39
- if _open_file(path, event)
40
- observe_read_file(path, listener)
41
- end
42
+ if file_is_open
43
+ observe_read_file(watched_file, listener)
42
44
  else
43
- observe_read_file(path, listener)
45
+ @logger.debug? && @logger.debug(":modify for #{path}, file is not open, opening now")
46
+ if _open_file(watched_file, event)
47
+ observe_read_file(watched_file, listener)
48
+ end
44
49
  end
45
50
  when :delete
46
- @logger.debug? && @logger.debug(":delete for #{path}, deleted from @files")
47
- if @files[path]
48
- observe_read_file(path, listener)
49
- @files[path].close
51
+ if file_is_open
52
+ @logger.debug? && @logger.debug(":delete for #{path}, closing file")
53
+ observe_read_file(watched_file, listener)
54
+ watched_file.file_close
55
+ else
56
+ @logger.debug? && @logger.debug(":delete for #{path}, file already closed")
50
57
  end
51
58
  listener.deleted
52
- @files.delete(path)
53
- @statcache.delete(path)
54
59
  when :timeout
55
- @logger.debug? && @logger.debug(":timeout for #{path}, deleted from @files")
56
- if (deleted = @files.delete(path))
57
- deleted.close
58
- end
60
+ @logger.debug? && @logger.debug(":timeout for #{path}, closing file")
61
+ watched_file.file_close
59
62
  listener.timed_out
60
- @statcache.delete(path)
61
63
  else
62
64
  @logger.warn("unknown event type #{event} for #{path}")
63
65
  end
@@ -65,24 +67,29 @@ module FileWatch
65
67
  end # def subscribe
66
68
 
67
69
  private
68
- def observe_read_file(path, listener)
69
- @buffers[path] ||= FileWatch::BufferedTokenizer.new(@opts[:delimiter])
70
- delimiter_byte_size = @opts[:delimiter].bytesize
70
+ def observe_read_file(watched_file, listener)
71
71
  changed = false
72
72
  loop do
73
73
  begin
74
- data = @files[path].sysread(32768)
74
+ data = watched_file.file_read(32768)
75
75
  changed = true
76
- @buffers[path].extract(data).each do |line|
76
+ watched_file.buffer_extract(data).each do |line|
77
77
  listener.accept(line)
78
- @sincedb[@statcache[path]] += (line.bytesize + delimiter_byte_size)
78
+ @sincedb[watched_file.inode] += (line.bytesize + @delimiter_byte_size)
79
79
  end
80
+ # watched_file bytes_read tracks the sincedb entry
81
+ # see TODO in watch.rb
82
+ watched_file.update_bytes_read(@sincedb[watched_file.inode])
80
83
  rescue EOFError
81
84
  listener.eof
82
85
  break
83
86
  rescue Errno::EWOULDBLOCK, Errno::EINTR
84
87
  listener.error
85
88
  break
89
+ rescue => e
90
+ @logger.debug? && @logger.debug("observe_read_file: general error reading #{watched_file.path} - error: #{e.inspect}")
91
+ listener.error
92
+ break
86
93
  end
87
94
  end
88
95
 
@@ -1,5 +1,4 @@
1
1
  require "filewatch/helper"
2
- require "filewatch/buftok"
3
2
  require "filewatch/watch"
4
3
 
5
4
  if RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/
@@ -14,14 +13,15 @@ require "JRubyFileExtension.jar" if defined? JRUBY_VERSION
14
13
  module FileWatch
15
14
  module TailBase
16
15
  # how often (in seconds) we @logger.warn a failed file open, per path.
17
- OPEN_WARN_INTERVAL = ENV["FILEWATCH_OPEN_WARN_INTERVAL"] ?
18
- ENV["FILEWATCH_OPEN_WARN_INTERVAL"].to_i : 300
16
+ OPEN_WARN_INTERVAL = ENV.fetch("FILEWATCH_OPEN_WARN_INTERVAL", 300).to_i
19
17
 
20
18
  attr_reader :logger
21
19
 
22
20
  class NoSinceDBPathGiven < StandardError; end
23
21
 
24
22
  public
23
+ # TODO move sincedb to watch.rb
24
+ # see TODO there
25
25
  def initialize(opts={})
26
26
  @iswindows = ((RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/) != nil)
27
27
 
@@ -31,14 +31,12 @@ module FileWatch
31
31
  @logger = Logger.new(STDERR)
32
32
  @logger.level = Logger::INFO
33
33
  end
34
- @files = {}
35
34
  @lastwarn = Hash.new { |h, k| h[k] = 0 }
36
35
  @buffers = {}
37
36
  @watch = FileWatch::Watch.new
38
37
  @watch.logger = @logger
39
- @sincedb = {}
40
38
  @sincedb_last_write = 0
41
- @statcache = {}
39
+ @sincedb = {}
42
40
  @opts = {
43
41
  :sincedb_write_interval => 10,
44
42
  :stat_interval => 1,
@@ -57,6 +55,9 @@ module FileWatch
57
55
  @watch.exclude(@opts[:exclude])
58
56
  @watch.close_older = @opts[:close_older]
59
57
  @watch.ignore_older = @opts[:ignore_older]
58
+ @watch.delimiter = @opts[:delimiter]
59
+ @watch.max_open_files = @opts[:max_open_files]
60
+ @delimiter_byte_size = @opts[:delimiter].bytesize
60
61
 
61
62
  _sincedb_open
62
63
  end # def initialize
@@ -74,28 +75,20 @@ module FileWatch
74
75
 
75
76
  public
76
77
  def sincedb_record_uid(path, stat)
77
- inode = @watch.inode(path,stat)
78
- @statcache[path] = inode
79
- return inode
78
+ # retain this call because its part of the public API
79
+ @watch.inode(path, stat)
80
80
  end # def sincedb_record_uid
81
81
 
82
82
  private
83
83
 
84
- def file_expired?(stat)
85
- return false if @opts[:ignore_older].nil?
86
- # (Time.now - stat.mtime) <- in jruby, this does int and float
87
- # conversions before the subtraction and returns a float.
88
- # so use all ints instead
89
- (Time.now.to_i - stat.mtime.to_i) > @opts[:ignore_older]
90
- end
91
-
92
- def _open_file(path, event)
84
+ def _open_file(watched_file, event)
85
+ path = watched_file.path
93
86
  @logger.debug? && @logger.debug("_open_file: #{path}: opening")
94
87
  begin
95
88
  if @iswindows && defined? JRUBY_VERSION
96
- @files[path] = Java::RubyFileExt::getRubyFile(path)
89
+ watched_file.file_add_opened(Java::RubyFileExt::getRubyFile(path))
97
90
  else
98
- @files[path] = File.open(path)
91
+ watched_file.file_add_opened(File.open(path))
99
92
  end
100
93
  rescue
101
94
  # don't emit this message too often. if a file that we can't
@@ -106,46 +99,68 @@ module FileWatch
106
99
  @logger.warn("failed to open #{path}: #{$!}")
107
100
  @lastwarn[path] = now
108
101
  else
109
- @logger.debug? && @logger.debug("(warn supressed) failed to open #{path}: #{$!}")
102
+ @logger.debug? && @logger.debug("(warn supressed) failed to open #{path}: #{$!.inspect}")
110
103
  end
111
- @files.delete(path)
104
+ watched_file.watch # set it back to watch so we can try it again
112
105
  return false
113
106
  end
107
+ _add_to_sincedb(watched_file, event)
108
+ true
109
+ end # def _open_file
114
110
 
115
- stat = File::Stat.new(path)
116
- sincedb_record_uid = sincedb_record_uid(path, stat)
117
-
118
- expired_based_size = file_expired?(stat) ? stat.size : 0
119
-
120
- if @sincedb.member?(sincedb_record_uid)
121
- last_size = @sincedb[sincedb_record_uid]
122
- @logger.debug? && @logger.debug("#{path}: sincedb last value #{@sincedb[sincedb_record_uid]}, cur size #{stat.size}")
123
- if last_size <= stat.size
124
- @logger.debug? && @logger.debug("#{path}: sincedb: seeking to #{last_size}")
125
- @files[path].sysseek(last_size, IO::SEEK_SET)
111
+ def _add_to_sincedb(watched_file, event)
112
+ # called when newly discovered files are opened
113
+ stat = watched_file.filestat
114
+ sincedb_key = watched_file.inode
115
+ path = watched_file.path
116
+
117
+ if @sincedb.member?(sincedb_key)
118
+ # we have seen this inode before
119
+ # but this is a new watched_file
120
+ # and we can't tell if its contents are the same
121
+ # as another file we have watched before.
122
+ last_read_size = @sincedb[sincedb_key]
123
+ @logger.debug? && @logger.debug("#{path}: sincedb last value #{@sincedb[sincedb_key]}, cur size #{stat.size}")
124
+ if stat.size > last_read_size
125
+ # 1) it could really be a new file with lots of new content
126
+ # 2) it could have old content that was read plus new that is not
127
+ @logger.debug? && @logger.debug("#{path}: sincedb: seeking to #{last_read_size}")
128
+ watched_file.file_seek(last_read_size) # going with 2
129
+ watched_file.update_bytes_read(last_read_size)
130
+ elsif stat.size == last_read_size
131
+ # 1) it could have old content that was read
132
+ # 2) it could have new content that happens to be the same size
133
+ @logger.debug? && @logger.debug("#{path}: sincedb: seeking to #{last_read_size}")
134
+ watched_file.file_seek(last_read_size) # going with 1.
135
+ watched_file.update_bytes_read(last_read_size)
126
136
  else
137
+ # it seems to be a new file with less content
127
138
  @logger.debug? && @logger.debug("#{path}: last value size is greater than current value, starting over")
128
- @sincedb[sincedb_record_uid] = 0
139
+ @sincedb[sincedb_key] = 0
140
+ watched_file.update_bytes_read(0) if watched_file.bytes_read != 0
129
141
  end
130
- elsif event == :create_initial && @files[path]
142
+ elsif event == :create_initial
131
143
  if @opts[:start_new_files_at] == :beginning
132
144
  @logger.debug? && @logger.debug("#{path}: initial create, no sincedb, seeking to beginning of file")
133
- @files[path].sysseek(expired_based_size, IO::SEEK_SET)
134
- @sincedb[sincedb_record_uid] = expired_based_size
145
+ watched_file.file_seek(0)
146
+ @sincedb[sincedb_key] = 0
135
147
  else
136
148
  # seek to end
137
149
  @logger.debug? && @logger.debug("#{path}: initial create, no sincedb, seeking to end #{stat.size}")
138
- @files[path].sysseek(stat.size, IO::SEEK_SET)
139
- @sincedb[sincedb_record_uid] = stat.size
150
+ watched_file.file_seek(stat.size)
151
+ @sincedb[sincedb_key] = stat.size
140
152
  end
141
- elsif event == :create && @files[path]
142
- @sincedb[sincedb_record_uid] = expired_based_size
153
+ elsif event == :create
154
+ @sincedb[sincedb_key] = 0
155
+ elsif event == :modify && @sincedb[sincedb_key].nil?
156
+ @sincedb[sincedb_key] = 0
157
+ elsif event == :unignore
158
+ @sincedb[sincedb_key] = watched_file.bytes_read
143
159
  else
144
160
  @logger.debug? && @logger.debug("#{path}: staying at position 0, no sincedb")
145
161
  end
146
-
147
162
  return true
148
- end # def _open_file
163
+ end # def _add_to_sincedb
149
164
 
150
165
  public
151
166
  def sincedb_write(reason=nil)
@@ -157,30 +172,34 @@ module FileWatch
157
172
  def _sincedb_open
158
173
  path = @opts[:sincedb_path]
159
174
  begin
160
- db = File.open(path)
175
+ File.open(path) do |db|
176
+ @logger.debug? && @logger.debug("_sincedb_open: reading from #{path}")
177
+ db.each do |line|
178
+ ino, dev_major, dev_minor, pos = line.split(" ", 4)
179
+ sincedb_key = [ino, dev_major.to_i, dev_minor.to_i]
180
+ @logger.debug? && @logger.debug("_sincedb_open: setting #{sincedb_key.inspect} to #{pos.to_i}")
181
+ @sincedb[sincedb_key] = pos.to_i
182
+ end
183
+ end
161
184
  rescue
162
185
  #No existing sincedb to load
163
- @logger.debug? && @logger.debug("_sincedb_open: #{path}: #{$!}")
164
- return
165
- end
166
-
167
- @logger.debug? && @logger.debug("_sincedb_open: reading from #{path}")
168
- db.each do |line|
169
- ino, dev_major, dev_minor, pos = line.split(" ", 4)
170
- sincedb_record_uid = [ino, dev_major.to_i, dev_minor.to_i]
171
- @logger.debug? && @logger.debug("_sincedb_open: setting #{sincedb_record_uid.inspect} to #{pos.to_i}")
172
- @sincedb[sincedb_record_uid] = pos.to_i
186
+ @logger.debug? && @logger.debug("_sincedb_open: error: #{path}: #{$!}")
173
187
  end
174
- db.close
175
188
  end # def _sincedb_open
176
189
 
177
190
  private
178
191
  def _sincedb_write
179
192
  path = @opts[:sincedb_path]
180
- if @iswindows || File.device?(path)
181
- IO.write(path, serialize_sincedb, 0)
182
- else
183
- File.atomic_write(path) {|file| file.write(serialize_sincedb) }
193
+ begin
194
+ if @iswindows || File.device?(path)
195
+ IO.write(path, serialize_sincedb, 0)
196
+ else
197
+ File.atomic_write(path) {|file| file.write(serialize_sincedb) }
198
+ end
199
+ rescue Errno::EACCES
200
+ # probably no file handles free
201
+ # maybe it will work next time
202
+ @logger.debug? && @logger.debug("_sincedb_write: error: #{path}: #{$!}")
184
203
  end
185
204
  end # def _sincedb_write
186
205
 
@@ -189,13 +208,13 @@ module FileWatch
189
208
  # it should be called for clean up
190
209
  # before the instance is disposed of.
191
210
  def quit
211
+ @watch.quit # <-- should close all the files
212
+ # and that should allow the sincedb_write to succeed if it could not before
192
213
  _sincedb_write
193
- @watch.quit
194
- @files.each {|path, file| file.close }
195
- @files.clear
196
214
  end # def quit
197
215
 
198
216
  public
217
+
199
218
  # close_file(path) is to be used by external code
200
219
  # when it knows that it is completely done with a file.
201
220
  # Other files or folders may still be being watched.
@@ -204,10 +223,7 @@ module FileWatch
204
223
  # The sysadmin should rename, move or delete the file.
205
224
  def close_file(path)
206
225
  @watch.unwatch(path)
207
- file = @files.delete(path)
208
- return if file.nil?
209
226
  _sincedb_write
210
- file.close
211
227
  end
212
228
 
213
229
  private
@@ -1,59 +1,42 @@
1
1
  require "logger"
2
+ require_relative 'watched_file'
3
+
2
4
  if RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/
3
5
  require "filewatch/winhelper"
6
+ FILEWATCH_INODE_METHOD = :win_inode
7
+ else
8
+ FILEWATCH_INODE_METHOD = :nix_inode
4
9
  end
5
10
 
6
11
  module FileWatch
12
+ # TODO make a WatchedFilesDb class that holds the watched_files instead of a hash
13
+ # it should support an 'identity' of path + inode
14
+ # it should be serializable instead of the sincedb
15
+ # it should be deserializable to recreate the exact state all files were in as last seen
16
+ # some parts of the each method should be handled by it, e.g.
17
+ # wfs_db.<state>_iterator{|wf| }, trapping the Errno::ENOENT, auto_delete and yield wtached_file
7
18
  class Watch
8
- class WatchedFile
9
- def self.new_initial(path, inode)
10
- new(path, inode, true)
11
- end
12
-
13
- def self.new_ongoing(path, inode)
14
- new(path, inode, false)
15
- end
16
-
17
- attr_reader :size, :inode
18
- attr_writer :create_sent, :initial, :timeout_sent
19
-
20
- attr_reader :path
21
-
22
- def initialize(path, inode, initial)
23
- @path = path
24
- @size, @create_sent, @timeout_sent = 0, false, false
25
- @inode, @initial = inode, initial
26
- end
27
-
28
- def update(stat, inode = nil)
29
- @size = stat.size
30
- @inode = inode if inode
31
- end
32
-
33
- def clear_timeout
34
- @timeout_sent = false
35
- end
36
19
 
37
- def create_sent?
38
- @create_sent
39
- end
20
+ MAX_FILES_WARN_INTERVAL = ENV.fetch("FILEWATCH_MAX_FILES_WARN_INTERVAL", 20).to_i
40
21
 
41
- def initial?
42
- @initial
43
- end
22
+ def self.win_inode(path, stat)
23
+ fileId = Winhelper.GetWindowsUniqueFileIdentifier(path)
24
+ [fileId, 0, 0] # dev_* doesn't make sense on Windows
25
+ end
44
26
 
45
- def timeout_sent?
46
- @timeout_sent
47
- end
27
+ def self.nix_inode(path, stat)
28
+ [stat.ino.to_s, stat.dev_major, stat.dev_minor]
29
+ end
48
30
 
49
- def to_s() inspect; end
31
+ def self.inode(path, stat)
32
+ send(FILEWATCH_INODE_METHOD, path, stat)
50
33
  end
51
34
 
52
- attr_accessor :logger, :close_older, :ignore_older
35
+ attr_accessor :logger
36
+ attr_accessor :delimiter
37
+ attr_reader :max_active
53
38
 
54
- public
55
39
  def initialize(opts={})
56
- @iswindows = ((RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/) != nil)
57
40
  if opts[:logger]
58
41
  @logger = opts[:logger]
59
42
  else
@@ -62,62 +45,65 @@ module FileWatch
62
45
  end
63
46
  @watching = []
64
47
  @exclude = []
65
- @files = Hash.new { |h, k| h[k] = WatchedFile.new(k, false, false) }
66
- @unwatched = Hash.new
48
+ @files = Hash.new
67
49
  # we need to be threadsafe about the mutation
68
50
  # of the above 2 ivars because the public
69
- # methods each, discover, watch and unwatch
51
+ # methods each, discover and watch
70
52
  # can be called from different threads.
71
53
  @lock = Mutex.new
72
54
  # we need to be threadsafe about the quit mutation
73
55
  @quit = false
74
56
  @quit_lock = Mutex.new
57
+ self.max_open_files = ENV["FILEWATCH_MAX_OPEN_FILES"].to_i
58
+ @lastwarn_max_files = 0
75
59
  end # def initialize
76
60
 
77
61
  public
62
+
63
+ def max_open_files=(value)
64
+ val = value.to_i
65
+ val = 4095 if value.nil? || val <= 0
66
+ @max_warn_msg = "Reached open files limit: #{val}, set by the 'max_open_files' option or default"
67
+ @max_active = val
68
+ end
69
+
70
+ def ignore_older=(value)
71
+ #nil is allowed but 0 and negatives are made nil
72
+ if !value.nil?
73
+ val = value.to_f
74
+ val = val <= 0 ? nil : val
75
+ end
76
+ @ignore_older = val
77
+ end
78
+
79
+ def close_older=(value)
80
+ if !value.nil?
81
+ val = value.to_f
82
+ val = val <= 0 ? nil : val
83
+ end
84
+ @close_older = val
85
+ end
86
+
78
87
  def exclude(path)
79
88
  path.to_a.each { |p| @exclude << p }
80
89
  end
81
90
 
82
- public
83
91
  def watch(path)
84
92
  synchronized do
85
93
  if !@watching.member?(path)
86
94
  @watching << path
87
95
  _discover_file(path) do |filepath, stat|
88
- WatchedFile.new_initial(filepath, inode(filepath, stat))
96
+ WatchedFile.new_initial(
97
+ filepath, inode(filepath, stat), stat
98
+ ).init_vars(@delimiter, @ignore_older, @close_older)
89
99
  end
90
100
  end
91
101
  end
92
102
  return true
93
103
  end # def watch
94
104
 
95
- def unwatch(path)
96
- synchronized do
97
- result = false
98
- if @watching.delete(path)
99
- _globbed_files(path).each do |file|
100
- deleted = @files.delete(file)
101
- @unwatched[file] = deleted if deleted
102
- end
103
- result = true
104
- else
105
- result = @files.delete(path)
106
- @unwatched[path] = result if result
107
- end
108
- return !!result
109
- end
110
- end
111
-
112
- public
113
- def inode(path,stat)
114
- if @iswindows
115
- fileId = Winhelper.GetWindowsUniqueFileIdentifier(path)
116
- inode = [fileId, 0, 0] # dev_* doesn't make sense on Windows
117
- else
118
- inode = [stat.ino.to_s, stat.dev_major, stat.dev_minor]
119
- end
120
- return inode
105
+ def inode(path, stat)
106
+ self.class.inode(path, stat)
121
107
  end
122
108
 
123
109
  # Calls &block with params [event_type, path]
@@ -126,77 +112,147 @@ module FileWatch
126
112
  # :create - file is created (new file after initial globs, start at 0)
127
113
  # :modify - file is modified (size increases)
128
114
  # :delete - file is deleted
129
- public
115
+ # :timeout - file is closable
116
+ # :unignore - file was ignored, but since then it received new content
130
117
  def each(&block)
131
118
  synchronized do
119
+ return if @files.empty?
120
+
121
+ file_deleteable = []
122
+ # creates this array just once
123
+ watched_files = @files.values
124
+
125
+ # look at the closed to see if its changed
126
+ watched_files.select {|wf| wf.closed? }.each do |watched_file|
127
+ path = watched_file.path
128
+ begin
129
+ stat = watched_file.restat
130
+ if watched_file.size_changed? || watched_file.inode_changed?(inode(path,stat))
131
+ # if the closed file changed, move it to the watched state
132
+ # not to active state because we want to use MAX_OPEN_FILES throttling.
133
+ watched_file.watch
134
+ end
135
+ rescue Errno::ENOENT
136
+ # file has gone away or we can't read it anymore.
137
+ file_deleteable << path
138
+ @logger.debug? && @logger.debug("each: closed: stat failed: #{path}: (#{$!}), deleting from @files")
139
+ rescue => e
140
+ @logger.debug? && @logger.debug("each: closed: stat failed: #{path}: (#{e.inspect})")
141
+ end
142
+ end
143
+
144
+ # look at the ignored to see if its changed
145
+ watched_files.select {|wf| wf.ignored? }.each do |watched_file|
146
+ path = watched_file.path
147
+ begin
148
+ stat = watched_file.restat
149
+ if watched_file.size_changed? || watched_file.inode_changed?(inode(path,stat))
150
+ # if the ignored file changed, move it to the watched state
151
+ # not to active state because we want to use MAX_OPEN_FILES throttling.
152
+ # this file has not been yielded to the block yet
153
+ # but we must have the tail to start from the end, so when the file
154
+ # was first ignored we updated the bytes_read to the stat.size at that time.
155
+ # by adding this to the sincedb so that the subsequent modify
156
+ # event can detect the change
157
+ watched_file.watch
158
+ yield(:unignore, watched_file)
159
+ end
160
+ rescue Errno::ENOENT
161
+ # file has gone away or we can't read it anymore.
162
+ file_deleteable << path
163
+ @logger.debug? && @logger.debug("each: ignored: stat failed: #{path}: (#{$!}), deleting from @files")
164
+ rescue => e
165
+ @logger.debug? && @logger.debug("each: ignored: stat failed: #{path}: (#{e.inspect})")
166
+ end
167
+ end
168
+
132
169
  # Send any creates.
133
- @files.each do |path, watched_file|
134
- if !watched_file.create_sent?
170
+ if (to_take = @max_active - watched_files.count{|wf| wf.active?}) > 0
171
+ watched_files.select {|wf| wf.watched? }.take(to_take).each do |watched_file|
172
+ watched_file.activate
173
+ # don't do create again
174
+ next if watched_file.state_history_any?(:closed, :ignored)
175
+ # if the file can't be opened during the yield
176
+ # its state is set back to watched
135
177
  if watched_file.initial?
136
- yield(:create_initial, path)
178
+ yield(:create_initial, watched_file)
179
+ else
180
+ yield(:create, watched_file)
181
+ end
182
+ end
183
+ else
184
+ now = Time.now.to_i
185
+ if (now - @lastwarn_max_files) > MAX_FILES_WARN_INTERVAL
186
+ waiting = @files.size - @max_active
187
+ specific = if @close_older.nil?
188
+ ", try setting close_older. There are #{waiting} unopened files"
137
189
  else
138
- yield(:create, path)
190
+ ", files yet to open: #{waiting}"
139
191
  end
140
- watched_file.create_sent = true
192
+ @logger.warn(@max_warn_msg + specific)
193
+ @lastwarn_max_files = now
141
194
  end
142
195
  end
143
196
 
144
- @files.each do |path, watched_file|
197
+ # wf.active means the actual files were opened
198
+ # and have been read once - unless they were empty at the time
199
+ watched_files.select {|wf| wf.active? }.each do |watched_file|
200
+ path = watched_file.path
145
201
  begin
146
- stat = File::Stat.new(path)
202
+ stat = watched_file.restat
147
203
  rescue Errno::ENOENT
148
204
  # file has gone away or we can't read it anymore.
149
- @files.delete(path)
150
- @logger.debug? && @logger.debug("#{path}: stat failed (#{$!}), deleting from @files")
151
- yield(:delete, path)
205
+ file_deleteable << path
206
+ @logger.debug? && @logger.debug("each: active: stat failed: #{path}: (#{$!}), deleting from @files")
207
+ watched_file.unwatch
208
+ yield(:delete, watched_file)
209
+ next
210
+ rescue => e
211
+ @logger.debug? && @logger.debug("each: active: stat failed: #{path}: (#{e.inspect})")
152
212
  next
153
213
  end
154
214
 
155
- if file_closable?(stat, watched_file)
156
- if !watched_file.timeout_sent?
157
- @logger.debug? && @logger.debug("#{path}: file expired")
158
- yield(:timeout, path)
159
- watched_file.timeout_sent = true
160
- end
215
+ if watched_file.file_closable?
216
+ @logger.debug? && @logger.debug("each: active: file expired: #{path}")
217
+ yield(:timeout, watched_file)
218
+ watched_file.close
161
219
  next
162
220
  end
163
221
 
164
- inode = inode(path,stat)
165
- old_size = watched_file.size
166
-
167
- if inode != watched_file.inode
168
- @logger.debug? && @logger.debug("#{path}: old inode was #{watched_file.inode.inspect}, new is #{inode.inspect}")
169
- yield(:delete, path)
170
- yield(:create, path)
171
- watched_file.update(stat, inode)
172
- elsif stat.size < old_size
173
- @logger.debug? && @logger.debug("#{path}: file rolled, new size is #{stat.size}, old size #{old_size}")
174
- yield(:delete, path)
175
- yield(:create, path)
176
- watched_file.update(stat, inode)
177
- elsif stat.size > old_size
178
- @logger.debug? && @logger.debug("#{path}: file grew, old size #{old_size}, new size #{stat.size}")
179
- yield(:modify, path)
180
- # if there is a material change to the file, re-enable timeout
181
- watched_file.clear_timeout
182
- watched_file.update(stat, inode)
222
+ _inode = inode(path,stat)
223
+ read_thus_far = watched_file.bytes_read
224
+ # we don't update the size here, its updated when we actually read
225
+ if watched_file.inode_changed?(_inode)
226
+ @logger.debug? && @logger.debug("each: new inode: #{path}: old inode was #{watched_file.inode.inspect}, new is #{_inode.inspect}")
227
+ watched_file.update_inode(_inode)
228
+ yield(:delete, watched_file)
229
+ yield(:create, watched_file)
230
+ elsif stat.size < read_thus_far
231
+ @logger.debug? && @logger.debug("each: file rolled: #{path}: new size is #{stat.size}, old size #{read_thus_far}")
232
+ yield(:delete, watched_file)
233
+ yield(:create, watched_file)
234
+ elsif stat.size > read_thus_far
235
+ @logger.debug? && @logger.debug("each: file grew: #{path}: old size #{read_thus_far}, new size #{stat.size}")
236
+ yield(:modify, watched_file)
183
237
  end
184
238
  end
239
+
240
+ file_deleteable.each {|f| @files.delete(f)}
185
241
  end
186
242
  end # def each
187
243
 
188
- public
189
244
  def discover
190
245
  synchronized do
191
246
  @watching.each do |path|
192
247
  _discover_file(path) do |filepath, stat|
193
- WatchedFile.new_ongoing(filepath, inode(filepath, stat))
248
+ WatchedFile.new_ongoing(
249
+ filepath, inode(filepath, stat), stat
250
+ ).init_vars(@delimiter, @ignore_older, @close_older)
194
251
  end
195
252
  end
196
253
  end
197
254
  end
198
255
 
199
- public
200
256
  def subscribe(stat_interval = 1, discover_interval = 5, &block)
201
257
  glob = 0
202
258
  reset_quit
@@ -208,80 +264,62 @@ module FileWatch
208
264
  discover
209
265
  glob = 0
210
266
  end
211
-
267
+ break if quit?
212
268
  sleep(stat_interval)
213
269
  end
270
+ @files.values.each(&:file_close)
214
271
  end # def subscribe
215
272
 
216
- private
217
- def file_closable?(stat, watched_file)
218
- file_can_close?(stat) && watched_file.size == stat.size
219
- end
220
-
221
- def file_ignorable?(stat)
222
- return false unless expiry_ignore_enabled?
223
- # (Time.now - stat.mtime) <- in jruby, this does int and float
224
- # conversions before the subtraction and returns a float.
225
- # so use all ints instead
226
- (Time.now.to_i - stat.mtime.to_i) > @ignore_older
227
- end
273
+ def quit
274
+ @quit_lock.synchronize { @quit = true }
275
+ end # def quit
228
276
 
229
- def file_can_close?(stat)
230
- return false unless expiry_close_enabled?
231
- # (Time.now - stat.mtime) <- in jruby, this does int and float
232
- # conversions before the subtraction and returns a float.
233
- # so use all ints instead
234
- (Time.now.to_i - stat.mtime.to_i) > @close_older
277
+ def quit?
278
+ @quit_lock.synchronize { @quit }
235
279
  end
236
280
 
237
281
  private
282
+
238
283
  def _discover_file(path)
239
284
  _globbed_files(path).each do |file|
240
- next if @files.member?(file)
241
- next if @unwatched.member?(file)
242
285
  next unless File.file?(file)
243
-
244
- @logger.debug? && @logger.debug("_discover_file: #{path}: new: #{file} (exclude is #{@exclude.inspect})")
286
+ new_discovery = false
287
+ if @files.member?(file)
288
+ watched_file = @files[file]
289
+ else
290
+ @logger.debug? && @logger.debug("_discover_file: #{path}: new: #{file} (exclude is #{@exclude.inspect})")
291
+ # let the caller build the object in its context
292
+ new_discovery = true
293
+ watched_file = yield(file, File::Stat.new(file))
294
+ end
245
295
 
246
296
  skip = false
247
297
  @exclude.each do |pattern|
248
298
  if File.fnmatch?(pattern, File.basename(file))
249
299
  @logger.debug? && @logger.debug("_discover_file: #{file}: skipping because it " +
250
- "matches exclude #{pattern}")
300
+ "matches exclude #{pattern}") if new_discovery
251
301
  skip = true
302
+ watched_file.unwatch
252
303
  break
253
304
  end
254
305
  end
255
306
  next if skip
256
307
 
257
- stat = File::Stat.new(file)
258
- # let the caller build the object in its context
259
- watched_file = yield(file, stat)
260
-
261
- if file_ignorable?(stat)
262
- msg = "_discover_file: #{file}: skipping because it was last modified more than #{@ignore_older} seconds ago"
263
- @logger.debug? && @logger.debug(msg)
264
- # we update the size on discovery here
265
- # so the existing contents are not read.
266
- # because, normally, a newly discovered file will
267
- # have a watched_file size of zero
268
- watched_file.update(stat)
308
+ if new_discovery
309
+ if watched_file.file_ignorable?
310
+ @logger.debug? && @logger.debug("_discover_file: #{file}: skipping because it was last modified more than #{@ignore_older} seconds ago")
311
+ # on discovery we put watched_file into the ignored state and that
312
+ # updates the size from the internal stat
313
+ # so the existing contents are not read.
314
+ # because, normally, a newly discovered file will
315
+ # have a watched_file size of zero
316
+ watched_file.ignore
317
+ end
318
+ @files[file] = watched_file
269
319
  end
270
-
271
- @files[file] = watched_file
272
320
  end
273
321
  end # def _discover_file
274
322
 
275
- private
276
- def expiry_close_enabled?
277
- !@close_older.nil?
278
- end
279
-
280
- private
281
- def expiry_ignore_enabled?
282
- !@ignore_older.nil?
283
- end
284
-
285
323
  private
286
324
  def _globbed_files(path)
287
325
  globbed_dirs = Dir.glob(path)
@@ -299,19 +337,11 @@ module FileWatch
299
337
  @lock.synchronize { block.call }
300
338
  end
301
339
 
302
- private
303
- def quit?
304
- @quit_lock.synchronize { @quit }
305
- end
340
+ public
306
341
 
307
342
  private
308
343
  def reset_quit
309
344
  @quit_lock.synchronize { @quit = false }
310
345
  end
311
-
312
- public
313
- def quit
314
- @quit_lock.synchronize { @quit = true }
315
- end # def quit
316
346
  end # class Watch
317
347
  end # module FileWatch
@@ -0,0 +1,173 @@
1
+ require "filewatch/buftok"
2
+
3
+ module FileWatch
4
+ class WatchedFile
5
+ def self.new_initial(path, inode, stat)
6
+ new(path, inode, stat, true)
7
+ end
8
+
9
+ def self.new_ongoing(path, inode, stat)
10
+ new(path, inode, stat, false)
11
+ end
12
+
13
+ attr_reader :bytes_read, :inode, :state, :file, :buffer, :state_history
14
+ attr_reader :path, :filestat, :accessed_at
15
+ attr_accessor :close_older, :ignore_older, :delimiter
16
+
17
+ def delimiter
18
+ @delimiter
19
+ end
20
+
21
+ def initialize(path, inode, stat, initial)
22
+ @path = path
23
+ @bytes_read = 0
24
+ @inode = inode
25
+ @initial = initial
26
+ @state_history = []
27
+ @state = :watched
28
+ @filestat = stat
29
+ set_accessed_at
30
+ end
31
+
32
+ def init_vars(delim, ignore_o, close_o)
33
+ @delimiter = delim
34
+ @ignore_older = ignore_o
35
+ @close_older = close_o
36
+ self
37
+ end
38
+
39
+ def set_accessed_at
40
+ @accessed_at = Time.now.to_f
41
+ end
42
+
43
+ def initial?
44
+ @initial
45
+ end
46
+
47
+ def size_changed?
48
+ filestat.size != bytes_read
49
+ end
50
+
51
+ def inode_changed?(value)
52
+ self.inode != value
53
+ end
54
+
55
+ def file_add_opened(rubyfile)
56
+ @file = rubyfile
57
+ @buffer = FileWatch::BufferedTokenizer.new(delimiter || "\n")
58
+ end
59
+
60
+ def file_close
61
+ return if @file.nil? || @file.closed?
62
+ @file.close
63
+ @file = nil
64
+ end
65
+
66
+ def file_seek(amount, whence = IO::SEEK_SET)
67
+ @file.sysseek(amount, whence)
68
+ end
69
+
70
+ def file_read(amount)
71
+ set_accessed_at
72
+ @file.sysread(amount)
73
+ end
74
+
75
+ def file_open?
76
+ !@file.nil? && !@file.closed?
77
+ end
78
+
79
+ def update_bytes_read(total_bytes_read)
80
+ return if total_bytes_read.nil?
81
+ @bytes_read = total_bytes_read
82
+ end
83
+
84
+ def buffer_extract(data)
85
+ @buffer.extract(data)
86
+ end
87
+
88
+ def update_inode(_inode)
89
+ @inode = _inode
90
+ end
91
+
92
+ def activate
93
+ set_state :active
94
+ end
95
+
96
+ def ignore
97
+ set_state :ignored
98
+ @bytes_read = @filestat.size
99
+ end
100
+
101
+ def close
102
+ set_state :closed
103
+ end
104
+
105
+ def watch
106
+ set_state :watched
107
+ end
108
+
109
+ def unwatch
110
+ set_state :unwatched
111
+ end
112
+
113
+ def active?
114
+ @state == :active
115
+ end
116
+
117
+ def ignored?
118
+ @state == :ignored
119
+ end
120
+
121
+ def closed?
122
+ @state == :closed
123
+ end
124
+
125
+ def watched?
126
+ @state == :watched
127
+ end
128
+
129
+ def unwatched?
130
+ @state == :unwatched
131
+ end
132
+
133
+ def expiry_close_enabled?
134
+ !@close_older.nil?
135
+ end
136
+
137
+ def expiry_ignore_enabled?
138
+ !@ignore_older.nil?
139
+ end
140
+
141
+ def restat
142
+ @filestat = File::Stat.new(path)
143
+ end
144
+
145
+ def set_state(value)
146
+ @state_history << @state
147
+ @state = value
148
+ end
149
+
150
+ def state_history_any?(*previous)
151
+ (@state_history & previous).any?
152
+ end
153
+
154
+ def file_closable?
155
+ file_can_close? && !size_changed?
156
+ end
157
+
158
+ def file_ignorable?
159
+ return false unless expiry_ignore_enabled?
160
+ # (Time.now - stat.mtime) <- in jruby, this does int and float
161
+ # conversions before the subtraction and returns a float.
162
+ # so use all floats upfront
163
+ (Time.now.to_f - filestat.mtime.to_f) > ignore_older
164
+ end
165
+
166
+ def file_can_close?
167
+ return false unless expiry_close_enabled?
168
+ (Time.now.to_f - @accessed_at) > close_older
169
+ end
170
+
171
+ def to_s() inspect; end
172
+ end
173
+ end
@@ -8,39 +8,41 @@ module FileWatch
8
8
  def subscribe(&block)
9
9
  # subscribe(stat_interval = 1, discover_interval = 5, &block)
10
10
  @watch.subscribe(@opts[:stat_interval],
11
- @opts[:discover_interval]) do |event, path|
11
+ @opts[:discover_interval]) do |event, watched_file|
12
+ path = watched_file.path
13
+ file_is_open = watched_file.file_open?
14
+
12
15
  case event
16
+ when :unignore
17
+ _add_to_sincedb(watched_file, event)
13
18
  when :create, :create_initial
14
- if @files.member?(path)
15
- @logger.debug? && @logger.debug("#{event} for #{path}: already exists in @files")
19
+ if file_is_open
20
+ @logger.debug? && @logger.debug("#{event} for #{path}: file already open")
16
21
  next
17
22
  end
18
- if _open_file(path, event)
19
- yield_read_file(path, &block)
23
+ if _open_file(watched_file, event)
24
+ yield_read_file(watched_file, &block)
20
25
  end
21
26
  when :modify
22
- if !@files.member?(path)
23
- @logger.debug? && @logger.debug(":modify for #{path}, does not exist in @files")
24
- if _open_file(path, event)
25
- yield_read_file(path, &block)
27
+ if !file_is_open
28
+ @logger.debug? && @logger.debug(":modify for #{path}, file is not open, opening now")
29
+ if _open_file(watched_file, event)
30
+ yield_read_file(watched_file, &block)
26
31
  end
27
32
  else
28
- yield_read_file(path, &block)
33
+ yield_read_file(watched_file, &block)
29
34
  end
30
35
  when :delete
31
- @logger.debug? && @logger.debug(":delete for: #{path} - closed and deleted from @files")
32
- if @files[path]
33
- yield_read_file(path, &block)
34
- @files[path].close
36
+ if file_is_open
37
+ @logger.debug? && @logger.debug(":delete for #{path}, closing file")
38
+ yield_read_file(watched_file, &block)
39
+ watched_file.file_close
40
+ else
41
+ @logger.debug? && @logger.debug(":delete for #{path}, file already closed")
35
42
  end
36
- @files.delete(path)
37
- @statcache.delete(path)
38
43
  when :timeout
39
- @logger.debug? && @logger.debug(":timeout for: #{path} - closed and deleted from @files")
40
- if (deleted = @files.delete(path))
41
- deleted.close
42
- end
43
- @statcache.delete(path)
44
+ @logger.debug? && @logger.debug(":timeout for #{path}, closing file")
45
+ watched_file.file_close
44
46
  else
45
47
  @logger.warn("unknown event type #{event} for #{path}")
46
48
  end
@@ -48,18 +50,19 @@ module FileWatch
48
50
  end # def subscribe
49
51
 
50
52
  private
51
- def yield_read_file(path, &block)
52
- @buffers[path] ||= FileWatch::BufferedTokenizer.new(@opts[:delimiter])
53
- delimiter_byte_size = @opts[:delimiter].bytesize
53
+ def yield_read_file(watched_file, &block)
54
54
  changed = false
55
55
  loop do
56
56
  begin
57
- data = @files[path].sysread(32768)
57
+ data = watched_file.file_read(32768)
58
58
  changed = true
59
- @buffers[path].extract(data).each do |line|
60
- yield(path, line)
61
- @sincedb[@statcache[path]] += (line.bytesize + delimiter_byte_size)
59
+ watched_file.buffer_extract(data).each do |line|
60
+ yield(watched_file.path, line)
61
+ @sincedb[watched_file.inode] += (line.bytesize + @delimiter_byte_size)
62
62
  end
63
+ # watched_file bytes_read tracks the sincedb entry
64
+ # see TODO in watch.rb
65
+ watched_file.update_bytes_read(@sincedb[watched_file.inode])
63
66
  rescue Errno::EWOULDBLOCK, Errno::EINTR, EOFError
64
67
  break
65
68
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filewatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jordan Sissel
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-01-04 00:00:00.000000000 Z
12
+ date: 2016-01-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -42,6 +42,7 @@ files:
42
42
  - lib/filewatch/tail.rb
43
43
  - lib/filewatch/tail_base.rb
44
44
  - lib/filewatch/watch.rb
45
+ - lib/filewatch/watched_file.rb
45
46
  - lib/filewatch/winhelper.rb
46
47
  - lib/filewatch/yielding_tail.rb
47
48
  - test/filewatch/tail.rb