franz 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/franz/sash.rb ADDED
@@ -0,0 +1,81 @@
1
+ require 'thread'
2
+
3
+ module Franz
4
+
5
+ # Sash - A threadsafe hash/array hybrid with access times
6
+ #
7
+ # @example
8
+ # s = Sash.new # => #<Sash...>
9
+ # s.keys # => []
10
+ # s.insert :key, :value # => value
11
+ # s.get :key # => [:value]
12
+ # s.insert :key, :crazy # => :crazy
13
+ # s.mtime :key # => 2014-02-18 21:24:30 -0800
14
+ # s.flush :key # => [:value, :crazy]
15
+ #
16
+ # Think of it like a Hash where the keys map to "value buffers"
17
+ class Sash
18
+
19
+ # Create a new, empty Sash.
20
+ def initialize
21
+ @mutex = Mutex.new
22
+ @mtime = Hash.new { |default, key| default[key] = nil }
23
+ @hash = Hash.new { |default, key| default[key] = [] }
24
+ end
25
+
26
+ # Grab a list of known keys.
27
+ #
28
+ # @return [Array<Object>]
29
+ def keys ; @hash.keys end
30
+
31
+ # Insert a value into a key's value buffer.
32
+ #
33
+ # @param key [Object]
34
+ # @param value [Object]
35
+ #
36
+ # @return [Object] the value
37
+ def insert key, value
38
+ @mutex.synchronize do
39
+ @hash[key] << value
40
+ @mtime[key] = Time.now
41
+ end
42
+ return value
43
+ end
44
+
45
+ # Return a key's value buffer.
46
+ #
47
+ # @param [Object] key
48
+ #
49
+ # @return [Array<Object>]
50
+ def get key ; @hash[key] end
51
+
52
+ # Remove and return a key's value buffer.
53
+ #
54
+ # @param [Object] key
55
+ #
56
+ # @return [Array<Object>]
57
+ def remove key ; @hash.delete(key) end
58
+
59
+ # Return the last time the key's value buffer was modified.
60
+ #
61
+ # @param [Object] key
62
+ #
63
+ # @return [Time]
64
+ def mtime key ; @mtime[key] end
65
+
66
+ # Flush and return a key's value buffer.
67
+ #
68
+ # @param [Object] key
69
+ #
70
+ # @return [Array<Object>]
71
+ def flush key
72
+ value = nil
73
+ @mutex.synchronize do
74
+ value = @hash[key]
75
+ @hash[key] = []
76
+ @mtime[key] = Time.now
77
+ end
78
+ return value
79
+ end
80
+ end
81
+ end
data/lib/franz/tail.rb ADDED
@@ -0,0 +1,191 @@
1
+ require 'thread'
2
+ require 'logger'
3
+
4
+ require 'buftok'
5
+
6
+ module Franz
7
+
8
+ # Tail receives low-level file events from a Watch and handles the actual
9
+ # reading of files, providing a stream of lines.
10
+ class Tail
11
+ attr_reader :cursors
12
+
13
+ # Start a new Tail thread in the background.
14
+ #
15
+ # @param opts [Hash] a complex Hash for tail configuration
16
+ def initialize opts={}
17
+ @watch_events = opts[:watch_events] || []
18
+ @tail_events = opts[:tail_events] || []
19
+ @eviction_interval = opts[:eviction_interval] || 60
20
+ @block_size = opts[:block_size] || 32_768 # 32 KiB
21
+ @spread_size = opts[:spread_size] || 98_304 # 96 KiB
22
+ @cursors = opts[:cursors] || Hash.new
23
+ @logger = opts[:logger] || Logger.new(STDOUT)
24
+
25
+ @buffer = Hash.new { |h, k| h[k] = BufferedTokenizer.new }
26
+ @file = Hash.new
27
+ @changed = Hash.new
28
+ @reading = Hash.new
29
+ @stop = false
30
+
31
+ @evict_thread = Thread.new do
32
+ log.debug 'starting tail-evict'
33
+ until @stop
34
+ evict
35
+ sleep eviction_interval
36
+ end
37
+ sleep eviction_interval
38
+ evict
39
+ end
40
+
41
+ @backlog = Hash.new { |h, k| h[k] = Array.new }
42
+ @incoming = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
43
+
44
+ @watch_thread = Thread.new do
45
+ log.debug 'starting tail-watch'
46
+ until @stop
47
+ e = watch_events.shift
48
+ @incoming[e[:path]].push e
49
+ end
50
+ end
51
+
52
+ @tail_thread = Thread.new do
53
+ until @stop
54
+ had_event = false
55
+
56
+ paths = (@backlog.keys + @incoming.keys).uniq.shuffle
57
+
58
+ paths.each do |path|
59
+ event = @backlog[path].shift
60
+ begin
61
+ event = @incoming[path].shift(true)
62
+ rescue ThreadError
63
+ next
64
+ end if event.nil?
65
+ had_event = true
66
+ handle event
67
+ end
68
+
69
+ sleep 0.05 unless had_event
70
+ end
71
+ end
72
+
73
+ log.debug 'started tail'
74
+ end
75
+
76
+ # Stop the Tail thread. Effectively only once.
77
+ #
78
+ # @return [Hash] internal "cursors" state
79
+ def stop
80
+ return state if @stop
81
+ @stop = true
82
+ @watch_thread.kill
83
+ @evict_thread.join
84
+ @tail_thread.join
85
+ log.debug 'stopped tail'
86
+ return state
87
+ end
88
+
89
+ # Return the internal "cursors" state
90
+ def state
91
+ return @cursors.dup
92
+ end
93
+
94
+ private
95
+ attr_reader :watch_events, :tail_events, :eviction_interval, :block_size, :cursors, :file, :buffer, :changed, :reading
96
+
97
+ def log ; @logger end
98
+
99
+ def open path
100
+ return true unless file[path].nil?
101
+ pos = @cursors.include?(path) ? @cursors[path] : 0
102
+ begin
103
+ file[path] = File.open(path)
104
+ file[path].sysseek pos, IO::SEEK_SET
105
+ @cursors[path] = pos
106
+ @changed[path] = Time.now.to_i
107
+ rescue Errno::ENOENT
108
+ return false
109
+ end
110
+ log.debug 'opened: path=%s' % path.inspect
111
+ return true
112
+ end
113
+
114
+ def read path, size
115
+ @reading[path] = true
116
+
117
+ bytes_read = 0
118
+ loop do
119
+ begin
120
+ break if file[path].pos >= size
121
+ rescue NoMethodError
122
+ break unless open(path)
123
+ break if file[path].pos >= size
124
+ end
125
+
126
+ if bytes_read >= @spread_size
127
+ @backlog[path].push name: :appended, path: path, size: size
128
+ break
129
+ end
130
+
131
+ begin
132
+ data = file[path].sysread @block_size
133
+ buffer[path].extract(data).each do |line|
134
+ log.trace 'captured: path=%s line=%s' % [ path, line ]
135
+ tail_events.push path: path, line: line
136
+ end
137
+ rescue EOFError, Errno::ENOENT
138
+ # we're done here
139
+ end
140
+
141
+ last_pos = @cursors[path]
142
+ @cursors[path] = file[path].pos
143
+ bytes_read += @cursors[path] - last_pos
144
+ end
145
+
146
+ log.trace 'read: path=%s size=%s' % [ path.inspect, size.inspect ]
147
+ @changed[path] = Time.now.to_i
148
+ @reading.delete path
149
+ end
150
+
151
+ def close path
152
+ @reading[path] = true # prevent evict from interrupting
153
+ file.delete(path).close if file.include? path
154
+ @cursors.delete(path)
155
+ @changed.delete(path)
156
+ @reading.delete(path)
157
+ log.debug 'closed: path=%s' % path.inspect
158
+ end
159
+
160
+ def evict
161
+ file.keys.each do |path|
162
+ next if @reading[path]
163
+ next unless @changed[path] < Time.now.to_i - eviction_interval
164
+ next unless file.include? path
165
+ next unless @incoming[path].empty?
166
+ next unless @backlog[path].empty?
167
+ file.delete(path).close
168
+ log.debug 'evicted: path=%s' % path.inspect
169
+ end
170
+ end
171
+
172
+ def handle event
173
+ log.trace 'handle: event=%s' % event.inspect
174
+ case event[:name]
175
+ when :created
176
+ when :replaced
177
+ close event[:path]
178
+ read event[:path], event[:size]
179
+ when :truncated
180
+ close event[:path]
181
+ read event[:path], event[:size]
182
+ when :appended
183
+ read event[:path], event[:size]
184
+ when :deleted
185
+ close event[:path]
186
+ else
187
+ raise 'invalid event'
188
+ end
189
+ end
190
+ end
191
+ end
@@ -0,0 +1,68 @@
1
+ require 'thread'
2
+ require 'logger'
3
+
4
+ require 'consistent_hashing'
5
+
6
+ module Franz
7
+
8
+ # TailPool creates a consistenly-hashed pool of Tails.
9
+ class TailPool
10
+ # Start a new TailPool thread in the background.
11
+ #
12
+ # @param opts [Hash] a complex Hash for configuration
13
+ def initialize opts={}
14
+ @size = opts[:size] || 5
15
+ @watch_events = opts[:watch_events] || []
16
+ @tail_events = opts[:tail_events] || []
17
+ @size = opts[:size] || 5
18
+ @logger = opts[:logger] || Logger.new(STDOUT)
19
+
20
+ @tails = []
21
+ @ring = ConsistentHashing::Ring.new
22
+ @events = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
23
+
24
+ @size.times do |i|
25
+ log.debug 'starting tail_pool-tail #%d' % i
26
+ @ring << @events[i]
27
+ @tails << Franz::Tail.new(opts.merge({
28
+ watch_events: @events[i],
29
+ tail_events: @tail_events
30
+ }))
31
+ end
32
+
33
+ @stop = false
34
+
35
+ @in_thread = Thread.new do
36
+ log.debug 'starting tail_pool-watch'
37
+ until @stop
38
+ e = @watch_events.shift
39
+ q = @ring.node_for e[:path]
40
+ q.push e
41
+ end
42
+ end
43
+
44
+ log.debug 'started tail_pool'
45
+ end
46
+
47
+ # Stop the TailPool thread. Effectively only once.
48
+ #
49
+ # @return [Hash] internal "cursors" state
50
+ def stop
51
+ return state if @stop
52
+ @stop = true
53
+ @tails.map(&:stop)
54
+ log.debug 'stopped tail_pool'
55
+ return state
56
+ end
57
+
58
+ # Return the internal "cursors" state
59
+ def state
60
+ @tails.map(&:state).reduce(&:merge)
61
+ end
62
+
63
+ private
64
+ attr_reader :watch_events, :tail_events, :size
65
+
66
+ def log ; @logger end
67
+ end
68
+ end
@@ -0,0 +1,180 @@
1
+ require 'logger'
2
+
3
+ module Franz
4
+
5
+ # Watch works in tandem with Discover to maintain a list of known files and
6
+ # their status. Events are generated when a file is created, destroyed, or
7
+ # modified (including appended, truncated, and replaced).
8
+ class Watch
9
+
10
+ # Start a new Watch thread in the background.
11
+ #
12
+ # @param [Hash] opts options for the watch
13
+ # @option opts [Queue] :discoveries ([]) "input" queue of discovered paths
14
+ # @option opts [Queue] :deletions ([]) "output" queue of deleted paths
15
+ # @option opts [Queue] :watch_events ([]) "output" queue of file events
16
+ # @option opts [Fixnum] :watch_interval (1) seconds between watch rounds
17
+ # @option opts [Hash<Path,State>] :stats ([]) internal "stats" state
18
+ # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
19
+ def initialize opts={}
20
+ @discoveries = opts[:discoveries] || []
21
+ @deletions = opts[:deletions] || []
22
+ @watch_events = opts[:watch_events] || []
23
+ @watch_interval = opts[:watch_interval] || 10
24
+ @stats = opts[:stats] || Hash.new
25
+ @logger = opts[:logger] || Logger.new(STDOUT)
26
+
27
+ # Need to resend old events to make sure Tail catches up
28
+ stats.each do |path, old_stat|
29
+ watch_events.push name: :appended, path: path, size: old_stat[:size]
30
+ end
31
+
32
+ @stop = false
33
+
34
+ @thread = Thread.new do
35
+ log.debug 'starting watch-discover'
36
+ until @stop
37
+ until discoveries.empty?
38
+ d = discoveries.pop
39
+ @stats[d] = nil
40
+ end
41
+ watch.each do |deleted|
42
+ @stats.delete deleted
43
+ deletions.push deleted
44
+ end
45
+ sleep watch_interval
46
+ end
47
+ end
48
+
49
+ log.debug 'started watch'
50
+ end
51
+
52
+ # Stop the Watch thread. Effectively only once.
53
+ #
54
+ # @return [Hash] internal "stats" state
55
+ def stop
56
+ return state if @stop
57
+ @stop = true
58
+ @thread.join
59
+ log.debug 'stopped watch'
60
+ return state
61
+ end
62
+
63
+ # Return the internal "stats" state
64
+ def state
65
+ return @stats.dup
66
+ end
67
+
68
+ private
69
+ attr_reader :discoveries, :deletions, :watch_events, :watch_interval, :stats
70
+
71
+ def log ; @logger end
72
+
73
+ def enqueue name, path, size=nil
74
+ log.trace 'enqueue: name=%s path=%s size=%s' % [
75
+ name.inspect, path.inspect, size.inspect
76
+ ]
77
+ watch_events.push name: name, path: path, size: size
78
+ end
79
+
80
+ def watch
81
+ deleted = []
82
+ stats.keys.each do |path|
83
+ old_stat = stats[path]
84
+ stat = stat_for path
85
+ stats[path] = stat
86
+
87
+ if file_created? old_stat, stat
88
+ enqueue :created, path
89
+ elsif file_deleted? old_stat, stat
90
+ enqueue :deleted, path
91
+ deleted << path
92
+ end
93
+
94
+ if file_replaced? old_stat, stat
95
+ enqueue :replaced, path, stat[:size]
96
+ elsif file_appended? old_stat, stat
97
+ enqueue :appended, path, stat[:size]
98
+ elsif file_truncated? old_stat, stat
99
+ enqueue :truncated, path, stat[:size]
100
+ end
101
+ end
102
+ return deleted
103
+ end
104
+
105
+
106
+
107
+ # Perform a file stat and return a simplified version.
108
+ #
109
+ # @param path [String] file path to examine
110
+ def stat_for path
111
+ return begin
112
+ stat = File::Stat.new(path)
113
+ {
114
+ inode: {
115
+ ino: stat.ino,
116
+ maj: stat.dev_major,
117
+ min: stat.dev_minor
118
+ },
119
+ size: stat.size
120
+ }
121
+ rescue Errno::ENOENT
122
+ nil
123
+ end
124
+ end
125
+
126
+ # Grab only the inode from a stat (or nil if the stat is nil).
127
+ #
128
+ # @param stat [Stat] stat to inspect
129
+ def inode_for stat
130
+ return nil if stat.nil?
131
+ return stat[:inode].to_a
132
+ end
133
+
134
+ # Detect whether the file was created.
135
+ #
136
+ # @param old_stat [Stat] stat before some change
137
+ # @param new_stat [Stat] stat after some change
138
+ def file_created? old_stat, new_stat
139
+ return !new_stat.nil? && old_stat.nil?
140
+ end
141
+
142
+ # Detect whether the file was deleted.
143
+ #
144
+ # @param old_stat [Stat] stat before some change
145
+ # @param new_stat [Stat] stat after some change
146
+ def file_deleted? old_stat, new_stat
147
+ return new_stat.nil? && !old_stat.nil?
148
+ end
149
+
150
+ # Detect whether the file was replaced (e.g. inode changed).
151
+ #
152
+ # @param old_stat [Stat] stat before some change
153
+ # @param new_stat [Stat] stat after some change
154
+ def file_replaced? old_stat, new_stat
155
+ return false if new_stat.nil?
156
+ return false if old_stat.nil?
157
+ return inode_for(new_stat) != inode_for(old_stat)
158
+ end
159
+
160
+ # Detect whether the file was truncated (e.g. rotated).
161
+ #
162
+ # @param old_stat [Stat] stat before some change
163
+ # @param new_stat [Stat] stat after some change
164
+ def file_truncated? old_stat, new_stat
165
+ return false if new_stat.nil?
166
+ return false if old_stat.nil?
167
+ return new_stat[:size] < old_stat[:size]
168
+ end
169
+
170
+ # Detect whether the file was appended.
171
+ #
172
+ # @param old_stat [Stat] stat before some change
173
+ # @param new_stat [Stat] stat after some change
174
+ def file_appended? old_stat, new_stat
175
+ return false if new_stat.nil?
176
+ return new_stat[:size] > 0 if old_stat.nil?
177
+ return new_stat[:size] > old_stat[:size]
178
+ end
179
+ end
180
+ end