franz 1.2.7

Sign up to get free protection for your applications and to get access to all the features.
data/lib/franz/sash.rb ADDED
@@ -0,0 +1,81 @@
1
+ require 'thread'
2
+
3
+ module Franz
4
+
5
+ # Sash - A threadsafe hash/array hybrid with access times
6
+ #
7
+ # @example
8
+ # s = Sash.new # => #<Sash...>
9
+ # s.keys # => []
10
+ # s.insert :key, :value # => value
11
+ # s.get :key # => [:value]
12
+ # s.insert :key, :crazy # => :crazy
13
+ # s.mtime :key # => 2014-02-18 21:24:30 -0800
14
+ # s.flush :key # => [:value, :crazy]
15
+ #
16
+ # Think of it like a Hash where the keys map to "value buffers"
17
+ class Sash
18
+
19
+ # Create a new, empty Sash.
20
+ def initialize
21
+ @mutex = Mutex.new
22
+ @mtime = Hash.new { |default, key| default[key] = nil }
23
+ @hash = Hash.new { |default, key| default[key] = [] }
24
+ end
25
+
26
+ # Grab a list of known keys.
27
+ #
28
+ # @return [Array<Object>]
29
+ def keys ; @hash.keys end
30
+
31
+ # Insert a value into a key's value buffer.
32
+ #
33
+ # @param key [Object]
34
+ # @param value [Object]
35
+ #
36
+ # @return [Object] the value
37
+ def insert key, value
38
+ @mutex.synchronize do
39
+ @hash[key] << value
40
+ @mtime[key] = Time.now
41
+ end
42
+ return value
43
+ end
44
+
45
+ # Return a key's value buffer.
46
+ #
47
+ # @param [Object] key
48
+ #
49
+ # @return [Array<Object>]
50
+ def get key ; @hash[key] end
51
+
52
+ # Remove and return a key's value buffer.
53
+ #
54
+ # @param [Object] key
55
+ #
56
+ # @return [Array<Object>]
57
+ def remove key ; @hash.delete(key) end
58
+
59
+ # Return the last time the key's value buffer was modified.
60
+ #
61
+ # @param [Object] key
62
+ #
63
+ # @return [Time]
64
+ def mtime key ; @mtime[key] end
65
+
66
+ # Flush and return a key's value buffer.
67
+ #
68
+ # @param [Object] key
69
+ #
70
+ # @return [Array<Object>]
71
+ def flush key
72
+ value = nil
73
+ @mutex.synchronize do
74
+ value = @hash[key]
75
+ @hash[key] = []
76
+ @mtime[key] = Time.now
77
+ end
78
+ return value
79
+ end
80
+ end
81
+ end
data/lib/franz/tail.rb ADDED
@@ -0,0 +1,191 @@
1
+ require 'thread'
2
+ require 'logger'
3
+
4
+ require 'buftok'
5
+
6
+ module Franz
7
+
8
+ # Tail receives low-level file events from a Watch and handles the actual
9
+ # reading of files, providing a stream of lines.
10
+ class Tail
11
+ attr_reader :cursors
12
+
13
+ # Start a new Tail thread in the background.
14
+ #
15
+ # @param opts [Hash] a complex Hash for tail configuration
16
+ def initialize opts={}
17
+ @watch_events = opts[:watch_events] || []
18
+ @tail_events = opts[:tail_events] || []
19
+ @eviction_interval = opts[:eviction_interval] || 60
20
+ @block_size = opts[:block_size] || 32_768 # 32 KiB
21
+ @spread_size = opts[:spread_size] || 98_304 # 96 KiB
22
+ @cursors = opts[:cursors] || Hash.new
23
+ @logger = opts[:logger] || Logger.new(STDOUT)
24
+
25
+ @buffer = Hash.new { |h, k| h[k] = BufferedTokenizer.new }
26
+ @file = Hash.new
27
+ @changed = Hash.new
28
+ @reading = Hash.new
29
+ @stop = false
30
+
31
+ @evict_thread = Thread.new do
32
+ log.debug 'starting tail-evict'
33
+ until @stop
34
+ evict
35
+ sleep eviction_interval
36
+ end
37
+ sleep eviction_interval
38
+ evict
39
+ end
40
+
41
+ @backlog = Hash.new { |h, k| h[k] = Array.new }
42
+ @incoming = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
43
+
44
+ @watch_thread = Thread.new do
45
+ log.debug 'starting tail-watch'
46
+ until @stop
47
+ e = watch_events.shift
48
+ @incoming[e[:path]].push e
49
+ end
50
+ end
51
+
52
+ @tail_thread = Thread.new do
53
+ until @stop
54
+ had_event = false
55
+
56
+ paths = (@backlog.keys + @incoming.keys).uniq.shuffle
57
+
58
+ paths.each do |path|
59
+ event = @backlog[path].shift
60
+ begin
61
+ event = @incoming[path].shift(true)
62
+ rescue ThreadError
63
+ next
64
+ end if event.nil?
65
+ had_event = true
66
+ handle event
67
+ end
68
+
69
+ sleep 0.05 unless had_event
70
+ end
71
+ end
72
+
73
+ log.debug 'started tail'
74
+ end
75
+
76
+ # Stop the Tail thread. Effectively only once.
77
+ #
78
+ # @return [Hash] internal "cursors" state
79
+ def stop
80
+ return state if @stop
81
+ @stop = true
82
+ @watch_thread.kill
83
+ @evict_thread.join
84
+ @tail_thread.join
85
+ log.debug 'stopped tail'
86
+ return state
87
+ end
88
+
89
+ # Return the internal "cursors" state
90
+ def state
91
+ return @cursors.dup
92
+ end
93
+
94
+ private
95
+ attr_reader :watch_events, :tail_events, :eviction_interval, :block_size, :cursors, :file, :buffer, :changed, :reading
96
+
97
+ def log ; @logger end
98
+
99
+ def open path
100
+ return true unless file[path].nil?
101
+ pos = @cursors.include?(path) ? @cursors[path] : 0
102
+ begin
103
+ file[path] = File.open(path)
104
+ file[path].sysseek pos, IO::SEEK_SET
105
+ @cursors[path] = pos
106
+ @changed[path] = Time.now.to_i
107
+ rescue Errno::ENOENT
108
+ return false
109
+ end
110
+ log.debug 'opened: path=%s' % path.inspect
111
+ return true
112
+ end
113
+
114
+ def read path, size
115
+ @reading[path] = true
116
+
117
+ bytes_read = 0
118
+ loop do
119
+ begin
120
+ break if file[path].pos >= size
121
+ rescue NoMethodError
122
+ break unless open(path)
123
+ break if file[path].pos >= size
124
+ end
125
+
126
+ if bytes_read >= @spread_size
127
+ @backlog[path].push name: :appended, path: path, size: size
128
+ break
129
+ end
130
+
131
+ begin
132
+ data = file[path].sysread @block_size
133
+ buffer[path].extract(data).each do |line|
134
+ log.trace 'captured: path=%s line=%s' % [ path, line ]
135
+ tail_events.push path: path, line: line
136
+ end
137
+ rescue EOFError, Errno::ENOENT
138
+ # we're done here
139
+ end
140
+
141
+ last_pos = @cursors[path]
142
+ @cursors[path] = file[path].pos
143
+ bytes_read += @cursors[path] - last_pos
144
+ end
145
+
146
+ log.trace 'read: path=%s size=%s' % [ path.inspect, size.inspect ]
147
+ @changed[path] = Time.now.to_i
148
+ @reading.delete path
149
+ end
150
+
151
+ def close path
152
+ @reading[path] = true # prevent evict from interrupting
153
+ file.delete(path).close if file.include? path
154
+ @cursors.delete(path)
155
+ @changed.delete(path)
156
+ @reading.delete(path)
157
+ log.debug 'closed: path=%s' % path.inspect
158
+ end
159
+
160
+ def evict
161
+ file.keys.each do |path|
162
+ next if @reading[path]
163
+ next unless @changed[path] < Time.now.to_i - eviction_interval
164
+ next unless file.include? path
165
+ next unless @incoming[path].empty?
166
+ next unless @backlog[path].empty?
167
+ file.delete(path).close
168
+ log.debug 'evicted: path=%s' % path.inspect
169
+ end
170
+ end
171
+
172
+ def handle event
173
+ log.trace 'handle: event=%s' % event.inspect
174
+ case event[:name]
175
+ when :created
176
+ when :replaced
177
+ close event[:path]
178
+ read event[:path], event[:size]
179
+ when :truncated
180
+ close event[:path]
181
+ read event[:path], event[:size]
182
+ when :appended
183
+ read event[:path], event[:size]
184
+ when :deleted
185
+ close event[:path]
186
+ else
187
+ raise 'invalid event'
188
+ end
189
+ end
190
+ end
191
+ end
@@ -0,0 +1,68 @@
1
+ require 'thread'
2
+ require 'logger'
3
+
4
+ require 'consistent_hashing'
5
+
6
+ module Franz
7
+
8
+ # TailPool creates a consistenly-hashed pool of Tails.
9
+ class TailPool
10
+ # Start a new TailPool thread in the background.
11
+ #
12
+ # @param opts [Hash] a complex Hash for configuration
13
+ def initialize opts={}
14
+ @size = opts[:size] || 5
15
+ @watch_events = opts[:watch_events] || []
16
+ @tail_events = opts[:tail_events] || []
17
+ @size = opts[:size] || 5
18
+ @logger = opts[:logger] || Logger.new(STDOUT)
19
+
20
+ @tails = []
21
+ @ring = ConsistentHashing::Ring.new
22
+ @events = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
23
+
24
+ @size.times do |i|
25
+ log.debug 'starting tail_pool-tail #%d' % i
26
+ @ring << @events[i]
27
+ @tails << Franz::Tail.new(opts.merge({
28
+ watch_events: @events[i],
29
+ tail_events: @tail_events
30
+ }))
31
+ end
32
+
33
+ @stop = false
34
+
35
+ @in_thread = Thread.new do
36
+ log.debug 'starting tail_pool-watch'
37
+ until @stop
38
+ e = @watch_events.shift
39
+ q = @ring.node_for e[:path]
40
+ q.push e
41
+ end
42
+ end
43
+
44
+ log.debug 'started tail_pool'
45
+ end
46
+
47
+ # Stop the TailPool thread. Effectively only once.
48
+ #
49
+ # @return [Hash] internal "cursors" state
50
+ def stop
51
+ return state if @stop
52
+ @stop = true
53
+ @tails.map(&:stop)
54
+ log.debug 'stopped tail_pool'
55
+ return state
56
+ end
57
+
58
+ # Return the internal "cursors" state
59
+ def state
60
+ @tails.map(&:state).reduce(&:merge)
61
+ end
62
+
63
+ private
64
+ attr_reader :watch_events, :tail_events, :size
65
+
66
+ def log ; @logger end
67
+ end
68
+ end
@@ -0,0 +1,180 @@
1
+ require 'logger'
2
+
3
+ module Franz
4
+
5
+ # Watch works in tandem with Discover to maintain a list of known files and
6
+ # their status. Events are generated when a file is created, destroyed, or
7
+ # modified (including appended, truncated, and replaced).
8
+ class Watch
9
+
10
+ # Start a new Watch thread in the background.
11
+ #
12
+ # @param [Hash] opts options for the watch
13
+ # @option opts [Queue] :discoveries ([]) "input" queue of discovered paths
14
+ # @option opts [Queue] :deletions ([]) "output" queue of deleted paths
15
+ # @option opts [Queue] :watch_events ([]) "output" queue of file events
16
+ # @option opts [Fixnum] :watch_interval (1) seconds between watch rounds
17
+ # @option opts [Hash<Path,State>] :stats ([]) internal "stats" state
18
+ # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
19
+ def initialize opts={}
20
+ @discoveries = opts[:discoveries] || []
21
+ @deletions = opts[:deletions] || []
22
+ @watch_events = opts[:watch_events] || []
23
+ @watch_interval = opts[:watch_interval] || 10
24
+ @stats = opts[:stats] || Hash.new
25
+ @logger = opts[:logger] || Logger.new(STDOUT)
26
+
27
+ # Need to resend old events to make sure Tail catches up
28
+ stats.each do |path, old_stat|
29
+ watch_events.push name: :appended, path: path, size: old_stat[:size]
30
+ end
31
+
32
+ @stop = false
33
+
34
+ @thread = Thread.new do
35
+ log.debug 'starting watch-discover'
36
+ until @stop
37
+ until discoveries.empty?
38
+ d = discoveries.pop
39
+ @stats[d] = nil
40
+ end
41
+ watch.each do |deleted|
42
+ @stats.delete deleted
43
+ deletions.push deleted
44
+ end
45
+ sleep watch_interval
46
+ end
47
+ end
48
+
49
+ log.debug 'started watch'
50
+ end
51
+
52
+ # Stop the Watch thread. Effectively only once.
53
+ #
54
+ # @return [Hash] internal "stats" state
55
+ def stop
56
+ return state if @stop
57
+ @stop = true
58
+ @thread.join
59
+ log.debug 'stopped watch'
60
+ return state
61
+ end
62
+
63
+ # Return the internal "stats" state
64
+ def state
65
+ return @stats.dup
66
+ end
67
+
68
+ private
69
+ attr_reader :discoveries, :deletions, :watch_events, :watch_interval, :stats
70
+
71
+ def log ; @logger end
72
+
73
+ def enqueue name, path, size=nil
74
+ log.trace 'enqueue: name=%s path=%s size=%s' % [
75
+ name.inspect, path.inspect, size.inspect
76
+ ]
77
+ watch_events.push name: name, path: path, size: size
78
+ end
79
+
80
+ def watch
81
+ deleted = []
82
+ stats.keys.each do |path|
83
+ old_stat = stats[path]
84
+ stat = stat_for path
85
+ stats[path] = stat
86
+
87
+ if file_created? old_stat, stat
88
+ enqueue :created, path
89
+ elsif file_deleted? old_stat, stat
90
+ enqueue :deleted, path
91
+ deleted << path
92
+ end
93
+
94
+ if file_replaced? old_stat, stat
95
+ enqueue :replaced, path, stat[:size]
96
+ elsif file_appended? old_stat, stat
97
+ enqueue :appended, path, stat[:size]
98
+ elsif file_truncated? old_stat, stat
99
+ enqueue :truncated, path, stat[:size]
100
+ end
101
+ end
102
+ return deleted
103
+ end
104
+
105
+
106
+
107
+ # Perform a file stat and return a simplified version.
108
+ #
109
+ # @param path [String] file path to examine
110
+ def stat_for path
111
+ return begin
112
+ stat = File::Stat.new(path)
113
+ {
114
+ inode: {
115
+ ino: stat.ino,
116
+ maj: stat.dev_major,
117
+ min: stat.dev_minor
118
+ },
119
+ size: stat.size
120
+ }
121
+ rescue Errno::ENOENT
122
+ nil
123
+ end
124
+ end
125
+
126
+ # Grab only the inode from a stat (or nil if the stat is nil).
127
+ #
128
+ # @param stat [Stat] stat to inspect
129
+ def inode_for stat
130
+ return nil if stat.nil?
131
+ return stat[:inode].to_a
132
+ end
133
+
134
+ # Detect whether the file was created.
135
+ #
136
+ # @param old_stat [Stat] stat before some change
137
+ # @param new_stat [Stat] stat after some change
138
+ def file_created? old_stat, new_stat
139
+ return !new_stat.nil? && old_stat.nil?
140
+ end
141
+
142
+ # Detect whether the file was deleted.
143
+ #
144
+ # @param old_stat [Stat] stat before some change
145
+ # @param new_stat [Stat] stat after some change
146
+ def file_deleted? old_stat, new_stat
147
+ return new_stat.nil? && !old_stat.nil?
148
+ end
149
+
150
+ # Detect whether the file was replaced (e.g. inode changed).
151
+ #
152
+ # @param old_stat [Stat] stat before some change
153
+ # @param new_stat [Stat] stat after some change
154
+ def file_replaced? old_stat, new_stat
155
+ return false if new_stat.nil?
156
+ return false if old_stat.nil?
157
+ return inode_for(new_stat) != inode_for(old_stat)
158
+ end
159
+
160
+ # Detect whether the file was truncated (e.g. rotated).
161
+ #
162
+ # @param old_stat [Stat] stat before some change
163
+ # @param new_stat [Stat] stat after some change
164
+ def file_truncated? old_stat, new_stat
165
+ return false if new_stat.nil?
166
+ return false if old_stat.nil?
167
+ return new_stat[:size] < old_stat[:size]
168
+ end
169
+
170
+ # Detect whether the file was appended.
171
+ #
172
+ # @param old_stat [Stat] stat before some change
173
+ # @param new_stat [Stat] stat after some change
174
+ def file_appended? old_stat, new_stat
175
+ return false if new_stat.nil?
176
+ return new_stat[:size] > 0 if old_stat.nil?
177
+ return new_stat[:size] > old_stat[:size]
178
+ end
179
+ end
180
+ end