franz 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +15 -0
- data/LICENSE +13 -0
- data/Rakefile +52 -0
- data/Readme.md +50 -0
- data/VERSION +1 -0
- data/bin/franz +81 -0
- data/franz.gemspec +25 -0
- data/lib/franz.rb +10 -0
- data/lib/franz/agg.rb +158 -0
- data/lib/franz/config.rb +24 -0
- data/lib/franz/discover.rb +108 -0
- data/lib/franz/input.rb +174 -0
- data/lib/franz/logger.rb +66 -0
- data/lib/franz/metadata.rb +33 -0
- data/lib/franz/output.rb +81 -0
- data/lib/franz/sash.rb +81 -0
- data/lib/franz/tail.rb +191 -0
- data/lib/franz/tail_pool.rb +68 -0
- data/lib/franz/watch.rb +180 -0
- data/test/test_franz_agg.rb +97 -0
- data/test/test_franz_discover.rb +88 -0
- data/test/test_franz_tail.rb +132 -0
- data/test/test_franz_watch.rb +144 -0
- metadata +155 -0
data/lib/franz/sash.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
module Franz
|
4
|
+
|
5
|
+
# Sash - A threadsafe hash/array hybrid with access times
|
6
|
+
#
|
7
|
+
# @example
|
8
|
+
# s = Sash.new # => #<Sash...>
|
9
|
+
# s.keys # => []
|
10
|
+
# s.insert :key, :value # => value
|
11
|
+
# s.get :key # => [:value]
|
12
|
+
# s.insert :key, :crazy # => :crazy
|
13
|
+
# s.mtime :key # => 2014-02-18 21:24:30 -0800
|
14
|
+
# s.flush :key # => [:value, :crazy]
|
15
|
+
#
|
16
|
+
# Think of it like a Hash where the keys map to "value buffers"
|
17
|
+
class Sash
|
18
|
+
|
19
|
+
# Create a new, empty Sash.
|
20
|
+
def initialize
|
21
|
+
@mutex = Mutex.new
|
22
|
+
@mtime = Hash.new { |default, key| default[key] = nil }
|
23
|
+
@hash = Hash.new { |default, key| default[key] = [] }
|
24
|
+
end
|
25
|
+
|
26
|
+
# Grab a list of known keys.
|
27
|
+
#
|
28
|
+
# @return [Array<Object>]
|
29
|
+
def keys ; @hash.keys end
|
30
|
+
|
31
|
+
# Insert a value into a key's value buffer.
|
32
|
+
#
|
33
|
+
# @param key [Object]
|
34
|
+
# @param value [Object]
|
35
|
+
#
|
36
|
+
# @return [Object] the value
|
37
|
+
def insert key, value
|
38
|
+
@mutex.synchronize do
|
39
|
+
@hash[key] << value
|
40
|
+
@mtime[key] = Time.now
|
41
|
+
end
|
42
|
+
return value
|
43
|
+
end
|
44
|
+
|
45
|
+
# Return a key's value buffer.
|
46
|
+
#
|
47
|
+
# @param [Object] key
|
48
|
+
#
|
49
|
+
# @return [Array<Object>]
|
50
|
+
def get key ; @hash[key] end
|
51
|
+
|
52
|
+
# Remove and return a key's value buffer.
|
53
|
+
#
|
54
|
+
# @param [Object] key
|
55
|
+
#
|
56
|
+
# @return [Array<Object>]
|
57
|
+
def remove key ; @hash.delete(key) end
|
58
|
+
|
59
|
+
# Return the last time the key's value buffer was modified.
|
60
|
+
#
|
61
|
+
# @param [Object] key
|
62
|
+
#
|
63
|
+
# @return [Time]
|
64
|
+
def mtime key ; @mtime[key] end
|
65
|
+
|
66
|
+
# Flush and return a key's value buffer.
|
67
|
+
#
|
68
|
+
# @param [Object] key
|
69
|
+
#
|
70
|
+
# @return [Array<Object>]
|
71
|
+
def flush key
|
72
|
+
value = nil
|
73
|
+
@mutex.synchronize do
|
74
|
+
value = @hash[key]
|
75
|
+
@hash[key] = []
|
76
|
+
@mtime[key] = Time.now
|
77
|
+
end
|
78
|
+
return value
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/franz/tail.rb
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
require 'buftok'
|
5
|
+
|
6
|
+
module Franz
|
7
|
+
|
8
|
+
# Tail receives low-level file events from a Watch and handles the actual
|
9
|
+
# reading of files, providing a stream of lines.
|
10
|
+
class Tail
|
11
|
+
attr_reader :cursors
|
12
|
+
|
13
|
+
# Start a new Tail thread in the background.
|
14
|
+
#
|
15
|
+
# @param opts [Hash] a complex Hash for tail configuration
|
16
|
+
def initialize opts={}
|
17
|
+
@watch_events = opts[:watch_events] || []
|
18
|
+
@tail_events = opts[:tail_events] || []
|
19
|
+
@eviction_interval = opts[:eviction_interval] || 60
|
20
|
+
@block_size = opts[:block_size] || 32_768 # 32 KiB
|
21
|
+
@spread_size = opts[:spread_size] || 98_304 # 96 KiB
|
22
|
+
@cursors = opts[:cursors] || Hash.new
|
23
|
+
@logger = opts[:logger] || Logger.new(STDOUT)
|
24
|
+
|
25
|
+
@buffer = Hash.new { |h, k| h[k] = BufferedTokenizer.new }
|
26
|
+
@file = Hash.new
|
27
|
+
@changed = Hash.new
|
28
|
+
@reading = Hash.new
|
29
|
+
@stop = false
|
30
|
+
|
31
|
+
@evict_thread = Thread.new do
|
32
|
+
log.debug 'starting tail-evict'
|
33
|
+
until @stop
|
34
|
+
evict
|
35
|
+
sleep eviction_interval
|
36
|
+
end
|
37
|
+
sleep eviction_interval
|
38
|
+
evict
|
39
|
+
end
|
40
|
+
|
41
|
+
@backlog = Hash.new { |h, k| h[k] = Array.new }
|
42
|
+
@incoming = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
|
43
|
+
|
44
|
+
@watch_thread = Thread.new do
|
45
|
+
log.debug 'starting tail-watch'
|
46
|
+
until @stop
|
47
|
+
e = watch_events.shift
|
48
|
+
@incoming[e[:path]].push e
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
@tail_thread = Thread.new do
|
53
|
+
until @stop
|
54
|
+
had_event = false
|
55
|
+
|
56
|
+
paths = (@backlog.keys + @incoming.keys).uniq.shuffle
|
57
|
+
|
58
|
+
paths.each do |path|
|
59
|
+
event = @backlog[path].shift
|
60
|
+
begin
|
61
|
+
event = @incoming[path].shift(true)
|
62
|
+
rescue ThreadError
|
63
|
+
next
|
64
|
+
end if event.nil?
|
65
|
+
had_event = true
|
66
|
+
handle event
|
67
|
+
end
|
68
|
+
|
69
|
+
sleep 0.05 unless had_event
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
log.debug 'started tail'
|
74
|
+
end
|
75
|
+
|
76
|
+
# Stop the Tail thread. Effectively only once.
|
77
|
+
#
|
78
|
+
# @return [Hash] internal "cursors" state
|
79
|
+
def stop
|
80
|
+
return state if @stop
|
81
|
+
@stop = true
|
82
|
+
@watch_thread.kill
|
83
|
+
@evict_thread.join
|
84
|
+
@tail_thread.join
|
85
|
+
log.debug 'stopped tail'
|
86
|
+
return state
|
87
|
+
end
|
88
|
+
|
89
|
+
# Return the internal "cursors" state
|
90
|
+
def state
|
91
|
+
return @cursors.dup
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
attr_reader :watch_events, :tail_events, :eviction_interval, :block_size, :cursors, :file, :buffer, :changed, :reading
|
96
|
+
|
97
|
+
def log ; @logger end
|
98
|
+
|
99
|
+
def open path
|
100
|
+
return true unless file[path].nil?
|
101
|
+
pos = @cursors.include?(path) ? @cursors[path] : 0
|
102
|
+
begin
|
103
|
+
file[path] = File.open(path)
|
104
|
+
file[path].sysseek pos, IO::SEEK_SET
|
105
|
+
@cursors[path] = pos
|
106
|
+
@changed[path] = Time.now.to_i
|
107
|
+
rescue Errno::ENOENT
|
108
|
+
return false
|
109
|
+
end
|
110
|
+
log.debug 'opened: path=%s' % path.inspect
|
111
|
+
return true
|
112
|
+
end
|
113
|
+
|
114
|
+
def read path, size
|
115
|
+
@reading[path] = true
|
116
|
+
|
117
|
+
bytes_read = 0
|
118
|
+
loop do
|
119
|
+
begin
|
120
|
+
break if file[path].pos >= size
|
121
|
+
rescue NoMethodError
|
122
|
+
break unless open(path)
|
123
|
+
break if file[path].pos >= size
|
124
|
+
end
|
125
|
+
|
126
|
+
if bytes_read >= @spread_size
|
127
|
+
@backlog[path].push name: :appended, path: path, size: size
|
128
|
+
break
|
129
|
+
end
|
130
|
+
|
131
|
+
begin
|
132
|
+
data = file[path].sysread @block_size
|
133
|
+
buffer[path].extract(data).each do |line|
|
134
|
+
log.trace 'captured: path=%s line=%s' % [ path, line ]
|
135
|
+
tail_events.push path: path, line: line
|
136
|
+
end
|
137
|
+
rescue EOFError, Errno::ENOENT
|
138
|
+
# we're done here
|
139
|
+
end
|
140
|
+
|
141
|
+
last_pos = @cursors[path]
|
142
|
+
@cursors[path] = file[path].pos
|
143
|
+
bytes_read += @cursors[path] - last_pos
|
144
|
+
end
|
145
|
+
|
146
|
+
log.trace 'read: path=%s size=%s' % [ path.inspect, size.inspect ]
|
147
|
+
@changed[path] = Time.now.to_i
|
148
|
+
@reading.delete path
|
149
|
+
end
|
150
|
+
|
151
|
+
def close path
|
152
|
+
@reading[path] = true # prevent evict from interrupting
|
153
|
+
file.delete(path).close if file.include? path
|
154
|
+
@cursors.delete(path)
|
155
|
+
@changed.delete(path)
|
156
|
+
@reading.delete(path)
|
157
|
+
log.debug 'closed: path=%s' % path.inspect
|
158
|
+
end
|
159
|
+
|
160
|
+
def evict
|
161
|
+
file.keys.each do |path|
|
162
|
+
next if @reading[path]
|
163
|
+
next unless @changed[path] < Time.now.to_i - eviction_interval
|
164
|
+
next unless file.include? path
|
165
|
+
next unless @incoming[path].empty?
|
166
|
+
next unless @backlog[path].empty?
|
167
|
+
file.delete(path).close
|
168
|
+
log.debug 'evicted: path=%s' % path.inspect
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def handle event
|
173
|
+
log.trace 'handle: event=%s' % event.inspect
|
174
|
+
case event[:name]
|
175
|
+
when :created
|
176
|
+
when :replaced
|
177
|
+
close event[:path]
|
178
|
+
read event[:path], event[:size]
|
179
|
+
when :truncated
|
180
|
+
close event[:path]
|
181
|
+
read event[:path], event[:size]
|
182
|
+
when :appended
|
183
|
+
read event[:path], event[:size]
|
184
|
+
when :deleted
|
185
|
+
close event[:path]
|
186
|
+
else
|
187
|
+
raise 'invalid event'
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
require 'consistent_hashing'
|
5
|
+
|
6
|
+
module Franz
|
7
|
+
|
8
|
+
# TailPool creates a consistenly-hashed pool of Tails.
|
9
|
+
class TailPool
|
10
|
+
# Start a new TailPool thread in the background.
|
11
|
+
#
|
12
|
+
# @param opts [Hash] a complex Hash for configuration
|
13
|
+
def initialize opts={}
|
14
|
+
@size = opts[:size] || 5
|
15
|
+
@watch_events = opts[:watch_events] || []
|
16
|
+
@tail_events = opts[:tail_events] || []
|
17
|
+
@size = opts[:size] || 5
|
18
|
+
@logger = opts[:logger] || Logger.new(STDOUT)
|
19
|
+
|
20
|
+
@tails = []
|
21
|
+
@ring = ConsistentHashing::Ring.new
|
22
|
+
@events = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
|
23
|
+
|
24
|
+
@size.times do |i|
|
25
|
+
log.debug 'starting tail_pool-tail #%d' % i
|
26
|
+
@ring << @events[i]
|
27
|
+
@tails << Franz::Tail.new(opts.merge({
|
28
|
+
watch_events: @events[i],
|
29
|
+
tail_events: @tail_events
|
30
|
+
}))
|
31
|
+
end
|
32
|
+
|
33
|
+
@stop = false
|
34
|
+
|
35
|
+
@in_thread = Thread.new do
|
36
|
+
log.debug 'starting tail_pool-watch'
|
37
|
+
until @stop
|
38
|
+
e = @watch_events.shift
|
39
|
+
q = @ring.node_for e[:path]
|
40
|
+
q.push e
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
log.debug 'started tail_pool'
|
45
|
+
end
|
46
|
+
|
47
|
+
# Stop the TailPool thread. Effectively only once.
|
48
|
+
#
|
49
|
+
# @return [Hash] internal "cursors" state
|
50
|
+
def stop
|
51
|
+
return state if @stop
|
52
|
+
@stop = true
|
53
|
+
@tails.map(&:stop)
|
54
|
+
log.debug 'stopped tail_pool'
|
55
|
+
return state
|
56
|
+
end
|
57
|
+
|
58
|
+
# Return the internal "cursors" state
|
59
|
+
def state
|
60
|
+
@tails.map(&:state).reduce(&:merge)
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
attr_reader :watch_events, :tail_events, :size
|
65
|
+
|
66
|
+
def log ; @logger end
|
67
|
+
end
|
68
|
+
end
|
data/lib/franz/watch.rb
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module Franz
|
4
|
+
|
5
|
+
# Watch works in tandem with Discover to maintain a list of known files and
|
6
|
+
# their status. Events are generated when a file is created, destroyed, or
|
7
|
+
# modified (including appended, truncated, and replaced).
|
8
|
+
class Watch
|
9
|
+
|
10
|
+
# Start a new Watch thread in the background.
|
11
|
+
#
|
12
|
+
# @param [Hash] opts options for the watch
|
13
|
+
# @option opts [Queue] :discoveries ([]) "input" queue of discovered paths
|
14
|
+
# @option opts [Queue] :deletions ([]) "output" queue of deleted paths
|
15
|
+
# @option opts [Queue] :watch_events ([]) "output" queue of file events
|
16
|
+
# @option opts [Fixnum] :watch_interval (1) seconds between watch rounds
|
17
|
+
# @option opts [Hash<Path,State>] :stats ([]) internal "stats" state
|
18
|
+
# @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
|
19
|
+
def initialize opts={}
|
20
|
+
@discoveries = opts[:discoveries] || []
|
21
|
+
@deletions = opts[:deletions] || []
|
22
|
+
@watch_events = opts[:watch_events] || []
|
23
|
+
@watch_interval = opts[:watch_interval] || 10
|
24
|
+
@stats = opts[:stats] || Hash.new
|
25
|
+
@logger = opts[:logger] || Logger.new(STDOUT)
|
26
|
+
|
27
|
+
# Need to resend old events to make sure Tail catches up
|
28
|
+
stats.each do |path, old_stat|
|
29
|
+
watch_events.push name: :appended, path: path, size: old_stat[:size]
|
30
|
+
end
|
31
|
+
|
32
|
+
@stop = false
|
33
|
+
|
34
|
+
@thread = Thread.new do
|
35
|
+
log.debug 'starting watch-discover'
|
36
|
+
until @stop
|
37
|
+
until discoveries.empty?
|
38
|
+
d = discoveries.pop
|
39
|
+
@stats[d] = nil
|
40
|
+
end
|
41
|
+
watch.each do |deleted|
|
42
|
+
@stats.delete deleted
|
43
|
+
deletions.push deleted
|
44
|
+
end
|
45
|
+
sleep watch_interval
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
log.debug 'started watch'
|
50
|
+
end
|
51
|
+
|
52
|
+
# Stop the Watch thread. Effectively only once.
|
53
|
+
#
|
54
|
+
# @return [Hash] internal "stats" state
|
55
|
+
def stop
|
56
|
+
return state if @stop
|
57
|
+
@stop = true
|
58
|
+
@thread.join
|
59
|
+
log.debug 'stopped watch'
|
60
|
+
return state
|
61
|
+
end
|
62
|
+
|
63
|
+
# Return the internal "stats" state
|
64
|
+
def state
|
65
|
+
return @stats.dup
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
attr_reader :discoveries, :deletions, :watch_events, :watch_interval, :stats
|
70
|
+
|
71
|
+
def log ; @logger end
|
72
|
+
|
73
|
+
def enqueue name, path, size=nil
|
74
|
+
log.trace 'enqueue: name=%s path=%s size=%s' % [
|
75
|
+
name.inspect, path.inspect, size.inspect
|
76
|
+
]
|
77
|
+
watch_events.push name: name, path: path, size: size
|
78
|
+
end
|
79
|
+
|
80
|
+
def watch
|
81
|
+
deleted = []
|
82
|
+
stats.keys.each do |path|
|
83
|
+
old_stat = stats[path]
|
84
|
+
stat = stat_for path
|
85
|
+
stats[path] = stat
|
86
|
+
|
87
|
+
if file_created? old_stat, stat
|
88
|
+
enqueue :created, path
|
89
|
+
elsif file_deleted? old_stat, stat
|
90
|
+
enqueue :deleted, path
|
91
|
+
deleted << path
|
92
|
+
end
|
93
|
+
|
94
|
+
if file_replaced? old_stat, stat
|
95
|
+
enqueue :replaced, path, stat[:size]
|
96
|
+
elsif file_appended? old_stat, stat
|
97
|
+
enqueue :appended, path, stat[:size]
|
98
|
+
elsif file_truncated? old_stat, stat
|
99
|
+
enqueue :truncated, path, stat[:size]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
return deleted
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
# Perform a file stat and return a simplified version.
|
108
|
+
#
|
109
|
+
# @param path [String] file path to examine
|
110
|
+
def stat_for path
|
111
|
+
return begin
|
112
|
+
stat = File::Stat.new(path)
|
113
|
+
{
|
114
|
+
inode: {
|
115
|
+
ino: stat.ino,
|
116
|
+
maj: stat.dev_major,
|
117
|
+
min: stat.dev_minor
|
118
|
+
},
|
119
|
+
size: stat.size
|
120
|
+
}
|
121
|
+
rescue Errno::ENOENT
|
122
|
+
nil
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Grab only the inode from a stat (or nil if the stat is nil).
|
127
|
+
#
|
128
|
+
# @param stat [Stat] stat to inspect
|
129
|
+
def inode_for stat
|
130
|
+
return nil if stat.nil?
|
131
|
+
return stat[:inode].to_a
|
132
|
+
end
|
133
|
+
|
134
|
+
# Detect whether the file was created.
|
135
|
+
#
|
136
|
+
# @param old_stat [Stat] stat before some change
|
137
|
+
# @param new_stat [Stat] stat after some change
|
138
|
+
def file_created? old_stat, new_stat
|
139
|
+
return !new_stat.nil? && old_stat.nil?
|
140
|
+
end
|
141
|
+
|
142
|
+
# Detect whether the file was deleted.
|
143
|
+
#
|
144
|
+
# @param old_stat [Stat] stat before some change
|
145
|
+
# @param new_stat [Stat] stat after some change
|
146
|
+
def file_deleted? old_stat, new_stat
|
147
|
+
return new_stat.nil? && !old_stat.nil?
|
148
|
+
end
|
149
|
+
|
150
|
+
# Detect whether the file was replaced (e.g. inode changed).
|
151
|
+
#
|
152
|
+
# @param old_stat [Stat] stat before some change
|
153
|
+
# @param new_stat [Stat] stat after some change
|
154
|
+
def file_replaced? old_stat, new_stat
|
155
|
+
return false if new_stat.nil?
|
156
|
+
return false if old_stat.nil?
|
157
|
+
return inode_for(new_stat) != inode_for(old_stat)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Detect whether the file was truncated (e.g. rotated).
|
161
|
+
#
|
162
|
+
# @param old_stat [Stat] stat before some change
|
163
|
+
# @param new_stat [Stat] stat after some change
|
164
|
+
def file_truncated? old_stat, new_stat
|
165
|
+
return false if new_stat.nil?
|
166
|
+
return false if old_stat.nil?
|
167
|
+
return new_stat[:size] < old_stat[:size]
|
168
|
+
end
|
169
|
+
|
170
|
+
# Detect whether the file was appended.
|
171
|
+
#
|
172
|
+
# @param old_stat [Stat] stat before some change
|
173
|
+
# @param new_stat [Stat] stat after some change
|
174
|
+
def file_appended? old_stat, new_stat
|
175
|
+
return false if new_stat.nil?
|
176
|
+
return new_stat[:size] > 0 if old_stat.nil?
|
177
|
+
return new_stat[:size] > old_stat[:size]
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|