franz 1.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +15 -0
- data/LICENSE +13 -0
- data/Rakefile +52 -0
- data/Readme.md +50 -0
- data/VERSION +1 -0
- data/bin/franz +81 -0
- data/franz.gemspec +25 -0
- data/lib/franz.rb +10 -0
- data/lib/franz/agg.rb +158 -0
- data/lib/franz/config.rb +24 -0
- data/lib/franz/discover.rb +108 -0
- data/lib/franz/input.rb +174 -0
- data/lib/franz/logger.rb +66 -0
- data/lib/franz/metadata.rb +33 -0
- data/lib/franz/output.rb +81 -0
- data/lib/franz/sash.rb +81 -0
- data/lib/franz/tail.rb +191 -0
- data/lib/franz/tail_pool.rb +68 -0
- data/lib/franz/watch.rb +180 -0
- data/test/test_franz_agg.rb +97 -0
- data/test/test_franz_discover.rb +88 -0
- data/test/test_franz_tail.rb +132 -0
- data/test/test_franz_watch.rb +144 -0
- metadata +155 -0
data/lib/franz/sash.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
module Franz
|
4
|
+
|
5
|
+
# Sash - A threadsafe hash/array hybrid with access times
|
6
|
+
#
|
7
|
+
# @example
|
8
|
+
# s = Sash.new # => #<Sash...>
|
9
|
+
# s.keys # => []
|
10
|
+
# s.insert :key, :value # => value
|
11
|
+
# s.get :key # => [:value]
|
12
|
+
# s.insert :key, :crazy # => :crazy
|
13
|
+
# s.mtime :key # => 2014-02-18 21:24:30 -0800
|
14
|
+
# s.flush :key # => [:value, :crazy]
|
15
|
+
#
|
16
|
+
# Think of it like a Hash where the keys map to "value buffers"
|
17
|
+
class Sash
|
18
|
+
|
19
|
+
# Create a new, empty Sash.
|
20
|
+
def initialize
|
21
|
+
@mutex = Mutex.new
|
22
|
+
@mtime = Hash.new { |default, key| default[key] = nil }
|
23
|
+
@hash = Hash.new { |default, key| default[key] = [] }
|
24
|
+
end
|
25
|
+
|
26
|
+
# Grab a list of known keys.
|
27
|
+
#
|
28
|
+
# @return [Array<Object>]
|
29
|
+
def keys ; @hash.keys end
|
30
|
+
|
31
|
+
# Insert a value into a key's value buffer.
|
32
|
+
#
|
33
|
+
# @param key [Object]
|
34
|
+
# @param value [Object]
|
35
|
+
#
|
36
|
+
# @return [Object] the value
|
37
|
+
def insert key, value
|
38
|
+
@mutex.synchronize do
|
39
|
+
@hash[key] << value
|
40
|
+
@mtime[key] = Time.now
|
41
|
+
end
|
42
|
+
return value
|
43
|
+
end
|
44
|
+
|
45
|
+
# Return a key's value buffer.
|
46
|
+
#
|
47
|
+
# @param [Object] key
|
48
|
+
#
|
49
|
+
# @return [Array<Object>]
|
50
|
+
def get key ; @hash[key] end
|
51
|
+
|
52
|
+
# Remove and return a key's value buffer.
|
53
|
+
#
|
54
|
+
# @param [Object] key
|
55
|
+
#
|
56
|
+
# @return [Array<Object>]
|
57
|
+
def remove key ; @hash.delete(key) end
|
58
|
+
|
59
|
+
# Return the last time the key's value buffer was modified.
|
60
|
+
#
|
61
|
+
# @param [Object] key
|
62
|
+
#
|
63
|
+
# @return [Time]
|
64
|
+
def mtime key ; @mtime[key] end
|
65
|
+
|
66
|
+
# Flush and return a key's value buffer.
|
67
|
+
#
|
68
|
+
# @param [Object] key
|
69
|
+
#
|
70
|
+
# @return [Array<Object>]
|
71
|
+
def flush key
|
72
|
+
value = nil
|
73
|
+
@mutex.synchronize do
|
74
|
+
value = @hash[key]
|
75
|
+
@hash[key] = []
|
76
|
+
@mtime[key] = Time.now
|
77
|
+
end
|
78
|
+
return value
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/franz/tail.rb
ADDED
@@ -0,0 +1,191 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
require 'buftok'
|
5
|
+
|
6
|
+
module Franz
|
7
|
+
|
8
|
+
# Tail receives low-level file events from a Watch and handles the actual
|
9
|
+
# reading of files, providing a stream of lines.
|
10
|
+
class Tail
|
11
|
+
attr_reader :cursors
|
12
|
+
|
13
|
+
# Start a new Tail thread in the background.
|
14
|
+
#
|
15
|
+
# @param opts [Hash] a complex Hash for tail configuration
|
16
|
+
def initialize opts={}
|
17
|
+
@watch_events = opts[:watch_events] || []
|
18
|
+
@tail_events = opts[:tail_events] || []
|
19
|
+
@eviction_interval = opts[:eviction_interval] || 60
|
20
|
+
@block_size = opts[:block_size] || 32_768 # 32 KiB
|
21
|
+
@spread_size = opts[:spread_size] || 98_304 # 96 KiB
|
22
|
+
@cursors = opts[:cursors] || Hash.new
|
23
|
+
@logger = opts[:logger] || Logger.new(STDOUT)
|
24
|
+
|
25
|
+
@buffer = Hash.new { |h, k| h[k] = BufferedTokenizer.new }
|
26
|
+
@file = Hash.new
|
27
|
+
@changed = Hash.new
|
28
|
+
@reading = Hash.new
|
29
|
+
@stop = false
|
30
|
+
|
31
|
+
@evict_thread = Thread.new do
|
32
|
+
log.debug 'starting tail-evict'
|
33
|
+
until @stop
|
34
|
+
evict
|
35
|
+
sleep eviction_interval
|
36
|
+
end
|
37
|
+
sleep eviction_interval
|
38
|
+
evict
|
39
|
+
end
|
40
|
+
|
41
|
+
@backlog = Hash.new { |h, k| h[k] = Array.new }
|
42
|
+
@incoming = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
|
43
|
+
|
44
|
+
@watch_thread = Thread.new do
|
45
|
+
log.debug 'starting tail-watch'
|
46
|
+
until @stop
|
47
|
+
e = watch_events.shift
|
48
|
+
@incoming[e[:path]].push e
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
@tail_thread = Thread.new do
|
53
|
+
until @stop
|
54
|
+
had_event = false
|
55
|
+
|
56
|
+
paths = (@backlog.keys + @incoming.keys).uniq.shuffle
|
57
|
+
|
58
|
+
paths.each do |path|
|
59
|
+
event = @backlog[path].shift
|
60
|
+
begin
|
61
|
+
event = @incoming[path].shift(true)
|
62
|
+
rescue ThreadError
|
63
|
+
next
|
64
|
+
end if event.nil?
|
65
|
+
had_event = true
|
66
|
+
handle event
|
67
|
+
end
|
68
|
+
|
69
|
+
sleep 0.05 unless had_event
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
log.debug 'started tail'
|
74
|
+
end
|
75
|
+
|
76
|
+
# Stop the Tail thread. Effectively only once.
|
77
|
+
#
|
78
|
+
# @return [Hash] internal "cursors" state
|
79
|
+
def stop
|
80
|
+
return state if @stop
|
81
|
+
@stop = true
|
82
|
+
@watch_thread.kill
|
83
|
+
@evict_thread.join
|
84
|
+
@tail_thread.join
|
85
|
+
log.debug 'stopped tail'
|
86
|
+
return state
|
87
|
+
end
|
88
|
+
|
89
|
+
# Return the internal "cursors" state
|
90
|
+
def state
|
91
|
+
return @cursors.dup
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
attr_reader :watch_events, :tail_events, :eviction_interval, :block_size, :cursors, :file, :buffer, :changed, :reading
|
96
|
+
|
97
|
+
def log ; @logger end
|
98
|
+
|
99
|
+
def open path
|
100
|
+
return true unless file[path].nil?
|
101
|
+
pos = @cursors.include?(path) ? @cursors[path] : 0
|
102
|
+
begin
|
103
|
+
file[path] = File.open(path)
|
104
|
+
file[path].sysseek pos, IO::SEEK_SET
|
105
|
+
@cursors[path] = pos
|
106
|
+
@changed[path] = Time.now.to_i
|
107
|
+
rescue Errno::ENOENT
|
108
|
+
return false
|
109
|
+
end
|
110
|
+
log.debug 'opened: path=%s' % path.inspect
|
111
|
+
return true
|
112
|
+
end
|
113
|
+
|
114
|
+
def read path, size
|
115
|
+
@reading[path] = true
|
116
|
+
|
117
|
+
bytes_read = 0
|
118
|
+
loop do
|
119
|
+
begin
|
120
|
+
break if file[path].pos >= size
|
121
|
+
rescue NoMethodError
|
122
|
+
break unless open(path)
|
123
|
+
break if file[path].pos >= size
|
124
|
+
end
|
125
|
+
|
126
|
+
if bytes_read >= @spread_size
|
127
|
+
@backlog[path].push name: :appended, path: path, size: size
|
128
|
+
break
|
129
|
+
end
|
130
|
+
|
131
|
+
begin
|
132
|
+
data = file[path].sysread @block_size
|
133
|
+
buffer[path].extract(data).each do |line|
|
134
|
+
log.trace 'captured: path=%s line=%s' % [ path, line ]
|
135
|
+
tail_events.push path: path, line: line
|
136
|
+
end
|
137
|
+
rescue EOFError, Errno::ENOENT
|
138
|
+
# we're done here
|
139
|
+
end
|
140
|
+
|
141
|
+
last_pos = @cursors[path]
|
142
|
+
@cursors[path] = file[path].pos
|
143
|
+
bytes_read += @cursors[path] - last_pos
|
144
|
+
end
|
145
|
+
|
146
|
+
log.trace 'read: path=%s size=%s' % [ path.inspect, size.inspect ]
|
147
|
+
@changed[path] = Time.now.to_i
|
148
|
+
@reading.delete path
|
149
|
+
end
|
150
|
+
|
151
|
+
def close path
|
152
|
+
@reading[path] = true # prevent evict from interrupting
|
153
|
+
file.delete(path).close if file.include? path
|
154
|
+
@cursors.delete(path)
|
155
|
+
@changed.delete(path)
|
156
|
+
@reading.delete(path)
|
157
|
+
log.debug 'closed: path=%s' % path.inspect
|
158
|
+
end
|
159
|
+
|
160
|
+
def evict
|
161
|
+
file.keys.each do |path|
|
162
|
+
next if @reading[path]
|
163
|
+
next unless @changed[path] < Time.now.to_i - eviction_interval
|
164
|
+
next unless file.include? path
|
165
|
+
next unless @incoming[path].empty?
|
166
|
+
next unless @backlog[path].empty?
|
167
|
+
file.delete(path).close
|
168
|
+
log.debug 'evicted: path=%s' % path.inspect
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def handle event
|
173
|
+
log.trace 'handle: event=%s' % event.inspect
|
174
|
+
case event[:name]
|
175
|
+
when :created
|
176
|
+
when :replaced
|
177
|
+
close event[:path]
|
178
|
+
read event[:path], event[:size]
|
179
|
+
when :truncated
|
180
|
+
close event[:path]
|
181
|
+
read event[:path], event[:size]
|
182
|
+
when :appended
|
183
|
+
read event[:path], event[:size]
|
184
|
+
when :deleted
|
185
|
+
close event[:path]
|
186
|
+
else
|
187
|
+
raise 'invalid event'
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
require 'consistent_hashing'
|
5
|
+
|
6
|
+
module Franz
|
7
|
+
|
8
|
+
# TailPool creates a consistenly-hashed pool of Tails.
|
9
|
+
class TailPool
|
10
|
+
# Start a new TailPool thread in the background.
|
11
|
+
#
|
12
|
+
# @param opts [Hash] a complex Hash for configuration
|
13
|
+
def initialize opts={}
|
14
|
+
@size = opts[:size] || 5
|
15
|
+
@watch_events = opts[:watch_events] || []
|
16
|
+
@tail_events = opts[:tail_events] || []
|
17
|
+
@size = opts[:size] || 5
|
18
|
+
@logger = opts[:logger] || Logger.new(STDOUT)
|
19
|
+
|
20
|
+
@tails = []
|
21
|
+
@ring = ConsistentHashing::Ring.new
|
22
|
+
@events = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
|
23
|
+
|
24
|
+
@size.times do |i|
|
25
|
+
log.debug 'starting tail_pool-tail #%d' % i
|
26
|
+
@ring << @events[i]
|
27
|
+
@tails << Franz::Tail.new(opts.merge({
|
28
|
+
watch_events: @events[i],
|
29
|
+
tail_events: @tail_events
|
30
|
+
}))
|
31
|
+
end
|
32
|
+
|
33
|
+
@stop = false
|
34
|
+
|
35
|
+
@in_thread = Thread.new do
|
36
|
+
log.debug 'starting tail_pool-watch'
|
37
|
+
until @stop
|
38
|
+
e = @watch_events.shift
|
39
|
+
q = @ring.node_for e[:path]
|
40
|
+
q.push e
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
log.debug 'started tail_pool'
|
45
|
+
end
|
46
|
+
|
47
|
+
# Stop the TailPool thread. Effectively only once.
|
48
|
+
#
|
49
|
+
# @return [Hash] internal "cursors" state
|
50
|
+
def stop
|
51
|
+
return state if @stop
|
52
|
+
@stop = true
|
53
|
+
@tails.map(&:stop)
|
54
|
+
log.debug 'stopped tail_pool'
|
55
|
+
return state
|
56
|
+
end
|
57
|
+
|
58
|
+
# Return the internal "cursors" state
|
59
|
+
def state
|
60
|
+
@tails.map(&:state).reduce(&:merge)
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
attr_reader :watch_events, :tail_events, :size
|
65
|
+
|
66
|
+
def log ; @logger end
|
67
|
+
end
|
68
|
+
end
|
data/lib/franz/watch.rb
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module Franz
|
4
|
+
|
5
|
+
# Watch works in tandem with Discover to maintain a list of known files and
|
6
|
+
# their status. Events are generated when a file is created, destroyed, or
|
7
|
+
# modified (including appended, truncated, and replaced).
|
8
|
+
class Watch
|
9
|
+
|
10
|
+
# Start a new Watch thread in the background.
|
11
|
+
#
|
12
|
+
# @param [Hash] opts options for the watch
|
13
|
+
# @option opts [Queue] :discoveries ([]) "input" queue of discovered paths
|
14
|
+
# @option opts [Queue] :deletions ([]) "output" queue of deleted paths
|
15
|
+
# @option opts [Queue] :watch_events ([]) "output" queue of file events
|
16
|
+
# @option opts [Fixnum] :watch_interval (1) seconds between watch rounds
|
17
|
+
# @option opts [Hash<Path,State>] :stats ([]) internal "stats" state
|
18
|
+
# @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
|
19
|
+
def initialize opts={}
|
20
|
+
@discoveries = opts[:discoveries] || []
|
21
|
+
@deletions = opts[:deletions] || []
|
22
|
+
@watch_events = opts[:watch_events] || []
|
23
|
+
@watch_interval = opts[:watch_interval] || 10
|
24
|
+
@stats = opts[:stats] || Hash.new
|
25
|
+
@logger = opts[:logger] || Logger.new(STDOUT)
|
26
|
+
|
27
|
+
# Need to resend old events to make sure Tail catches up
|
28
|
+
stats.each do |path, old_stat|
|
29
|
+
watch_events.push name: :appended, path: path, size: old_stat[:size]
|
30
|
+
end
|
31
|
+
|
32
|
+
@stop = false
|
33
|
+
|
34
|
+
@thread = Thread.new do
|
35
|
+
log.debug 'starting watch-discover'
|
36
|
+
until @stop
|
37
|
+
until discoveries.empty?
|
38
|
+
d = discoveries.pop
|
39
|
+
@stats[d] = nil
|
40
|
+
end
|
41
|
+
watch.each do |deleted|
|
42
|
+
@stats.delete deleted
|
43
|
+
deletions.push deleted
|
44
|
+
end
|
45
|
+
sleep watch_interval
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
log.debug 'started watch'
|
50
|
+
end
|
51
|
+
|
52
|
+
# Stop the Watch thread. Effectively only once.
|
53
|
+
#
|
54
|
+
# @return [Hash] internal "stats" state
|
55
|
+
def stop
|
56
|
+
return state if @stop
|
57
|
+
@stop = true
|
58
|
+
@thread.join
|
59
|
+
log.debug 'stopped watch'
|
60
|
+
return state
|
61
|
+
end
|
62
|
+
|
63
|
+
# Return the internal "stats" state
|
64
|
+
def state
|
65
|
+
return @stats.dup
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
attr_reader :discoveries, :deletions, :watch_events, :watch_interval, :stats
|
70
|
+
|
71
|
+
def log ; @logger end
|
72
|
+
|
73
|
+
def enqueue name, path, size=nil
|
74
|
+
log.trace 'enqueue: name=%s path=%s size=%s' % [
|
75
|
+
name.inspect, path.inspect, size.inspect
|
76
|
+
]
|
77
|
+
watch_events.push name: name, path: path, size: size
|
78
|
+
end
|
79
|
+
|
80
|
+
def watch
|
81
|
+
deleted = []
|
82
|
+
stats.keys.each do |path|
|
83
|
+
old_stat = stats[path]
|
84
|
+
stat = stat_for path
|
85
|
+
stats[path] = stat
|
86
|
+
|
87
|
+
if file_created? old_stat, stat
|
88
|
+
enqueue :created, path
|
89
|
+
elsif file_deleted? old_stat, stat
|
90
|
+
enqueue :deleted, path
|
91
|
+
deleted << path
|
92
|
+
end
|
93
|
+
|
94
|
+
if file_replaced? old_stat, stat
|
95
|
+
enqueue :replaced, path, stat[:size]
|
96
|
+
elsif file_appended? old_stat, stat
|
97
|
+
enqueue :appended, path, stat[:size]
|
98
|
+
elsif file_truncated? old_stat, stat
|
99
|
+
enqueue :truncated, path, stat[:size]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
return deleted
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
# Perform a file stat and return a simplified version.
|
108
|
+
#
|
109
|
+
# @param path [String] file path to examine
|
110
|
+
def stat_for path
|
111
|
+
return begin
|
112
|
+
stat = File::Stat.new(path)
|
113
|
+
{
|
114
|
+
inode: {
|
115
|
+
ino: stat.ino,
|
116
|
+
maj: stat.dev_major,
|
117
|
+
min: stat.dev_minor
|
118
|
+
},
|
119
|
+
size: stat.size
|
120
|
+
}
|
121
|
+
rescue Errno::ENOENT
|
122
|
+
nil
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Grab only the inode from a stat (or nil if the stat is nil).
|
127
|
+
#
|
128
|
+
# @param stat [Stat] stat to inspect
|
129
|
+
def inode_for stat
|
130
|
+
return nil if stat.nil?
|
131
|
+
return stat[:inode].to_a
|
132
|
+
end
|
133
|
+
|
134
|
+
# Detect whether the file was created.
|
135
|
+
#
|
136
|
+
# @param old_stat [Stat] stat before some change
|
137
|
+
# @param new_stat [Stat] stat after some change
|
138
|
+
def file_created? old_stat, new_stat
|
139
|
+
return !new_stat.nil? && old_stat.nil?
|
140
|
+
end
|
141
|
+
|
142
|
+
# Detect whether the file was deleted.
|
143
|
+
#
|
144
|
+
# @param old_stat [Stat] stat before some change
|
145
|
+
# @param new_stat [Stat] stat after some change
|
146
|
+
def file_deleted? old_stat, new_stat
|
147
|
+
return new_stat.nil? && !old_stat.nil?
|
148
|
+
end
|
149
|
+
|
150
|
+
# Detect whether the file was replaced (e.g. inode changed).
|
151
|
+
#
|
152
|
+
# @param old_stat [Stat] stat before some change
|
153
|
+
# @param new_stat [Stat] stat after some change
|
154
|
+
def file_replaced? old_stat, new_stat
|
155
|
+
return false if new_stat.nil?
|
156
|
+
return false if old_stat.nil?
|
157
|
+
return inode_for(new_stat) != inode_for(old_stat)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Detect whether the file was truncated (e.g. rotated).
|
161
|
+
#
|
162
|
+
# @param old_stat [Stat] stat before some change
|
163
|
+
# @param new_stat [Stat] stat after some change
|
164
|
+
def file_truncated? old_stat, new_stat
|
165
|
+
return false if new_stat.nil?
|
166
|
+
return false if old_stat.nil?
|
167
|
+
return new_stat[:size] < old_stat[:size]
|
168
|
+
end
|
169
|
+
|
170
|
+
# Detect whether the file was appended.
|
171
|
+
#
|
172
|
+
# @param old_stat [Stat] stat before some change
|
173
|
+
# @param new_stat [Stat] stat after some change
|
174
|
+
def file_appended? old_stat, new_stat
|
175
|
+
return false if new_stat.nil?
|
176
|
+
return new_stat[:size] > 0 if old_stat.nil?
|
177
|
+
return new_stat[:size] > old_stat[:size]
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|