franz 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +15 -0
- data/LICENSE +13 -0
- data/Rakefile +52 -0
- data/Readme.md +50 -0
- data/VERSION +1 -0
- data/bin/franz +81 -0
- data/franz.gemspec +25 -0
- data/lib/franz.rb +10 -0
- data/lib/franz/agg.rb +158 -0
- data/lib/franz/config.rb +24 -0
- data/lib/franz/discover.rb +108 -0
- data/lib/franz/input.rb +174 -0
- data/lib/franz/logger.rb +66 -0
- data/lib/franz/metadata.rb +33 -0
- data/lib/franz/output.rb +81 -0
- data/lib/franz/sash.rb +81 -0
- data/lib/franz/tail.rb +191 -0
- data/lib/franz/tail_pool.rb +68 -0
- data/lib/franz/watch.rb +180 -0
- data/test/test_franz_agg.rb +97 -0
- data/test/test_franz_discover.rb +88 -0
- data/test/test_franz_tail.rb +132 -0
- data/test/test_franz_watch.rb +144 -0
- metadata +155 -0
@@ -0,0 +1,108 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
|
4
|
+
# Discover performs half of file existence detection by expanding globs and
|
5
|
+
# keeping track of files known to Franz. Discover requires a deletions Queue to
|
6
|
+
# maintain this state, so it's fairly useless without a Watch.
|
7
|
+
class Franz::Discover
|
8
|
+
|
9
|
+
# Start a new Discover thread in the background.
|
10
|
+
#
|
11
|
+
# @param [Hash] opts options for the discovery
|
12
|
+
# @option opts [Array<Hash>] :configs ([]) file input configuration
|
13
|
+
# @option opts [Queue] :discoveries ([]) "output" queue of discovered paths
|
14
|
+
# @option opts [Queue] :deletions ([]) "input" queue of deleted paths
|
15
|
+
# @option opts [Integer] :discover_interval (5) seconds between discover rounds
|
16
|
+
# @option opts [Array<Path>] :known ([]) internal "known" state
|
17
|
+
# @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
|
18
|
+
def initialize opts={}
|
19
|
+
@configs = opts[:configs] || []
|
20
|
+
@discoveries = opts[:discoveries] || []
|
21
|
+
@deletions = opts[:deletions] || []
|
22
|
+
@discover_interval = opts[:discover_interval] || 30
|
23
|
+
@ignore_before = opts[:ignore_before] || 0
|
24
|
+
@known = opts[:known] || []
|
25
|
+
@logger = opts[:logger] || Logger.new(STDOUT)
|
26
|
+
|
27
|
+
@configs = configs.map do |config|
|
28
|
+
config[:includes] ||= []
|
29
|
+
config[:excludes] ||= []
|
30
|
+
config
|
31
|
+
end
|
32
|
+
|
33
|
+
@stop = false
|
34
|
+
|
35
|
+
@thread = Thread.new do
|
36
|
+
log.debug 'starting dicover-delete'
|
37
|
+
until @stop
|
38
|
+
until deletions.empty?
|
39
|
+
d = deletions.pop
|
40
|
+
@known.delete d
|
41
|
+
log.debug 'deleted: %s' % d.inspect
|
42
|
+
end
|
43
|
+
discover.each do |discovery|
|
44
|
+
discoveries.push discovery
|
45
|
+
@known.push discovery
|
46
|
+
log.debug 'discovered: %s' % discovery.inspect
|
47
|
+
end
|
48
|
+
sleep discover_interval
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
log.debug 'started discover'
|
53
|
+
end
|
54
|
+
|
55
|
+
# Stop the Discover thread. Effectively only once.
|
56
|
+
#
|
57
|
+
# @return [Array] internal "known" state
|
58
|
+
def stop
|
59
|
+
return state if @stop
|
60
|
+
@stop = true
|
61
|
+
@thread.join
|
62
|
+
log.debug 'stopped discover'
|
63
|
+
return state
|
64
|
+
end
|
65
|
+
|
66
|
+
# Return the internal "known" state
|
67
|
+
def state
|
68
|
+
return @known.dup
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
attr_reader :configs, :discoveries, :deletions, :discover_interval, :known
|
73
|
+
|
74
|
+
def log ; @logger end
|
75
|
+
|
76
|
+
def discover
|
77
|
+
discovered = []
|
78
|
+
configs.each do |config|
|
79
|
+
config[:includes].each do |glob|
|
80
|
+
expand(glob).each do |path|
|
81
|
+
next if config[:excludes].any? { |exclude|
|
82
|
+
File.fnmatch? exclude, File::basename(path)
|
83
|
+
}
|
84
|
+
next if known.include? path
|
85
|
+
next unless File.file? path
|
86
|
+
next if File.mtime(path).to_i <= @ignore_before
|
87
|
+
discovered.push path
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
return discovered
|
92
|
+
end
|
93
|
+
|
94
|
+
def expand glob
|
95
|
+
dir_glob = File.dirname(glob)
|
96
|
+
file_glob = File.basename(glob)
|
97
|
+
files = []
|
98
|
+
Dir.glob(dir_glob).each do |dir|
|
99
|
+
next unless File::directory?(dir)
|
100
|
+
Dir.foreach(dir) do |fname|
|
101
|
+
next if fname == '.' || fname == '..'
|
102
|
+
next unless File.fnmatch?(file_glob, fname)
|
103
|
+
files << File.join(dir, fname)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
files
|
107
|
+
end
|
108
|
+
end
|
data/lib/franz/input.rb
ADDED
@@ -0,0 +1,174 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
require 'deep_merge'
|
5
|
+
|
6
|
+
require_relative 'agg'
|
7
|
+
require_relative 'tail'
|
8
|
+
require_relative 'tail_pool'
|
9
|
+
require_relative 'watch'
|
10
|
+
require_relative 'discover'
|
11
|
+
|
12
|
+
module Franz
|
13
|
+
|
14
|
+
# File input for Franz. Really, the only input for Franz, so I hope you like it.
|
15
|
+
class Input
|
16
|
+
# Start a new input in the background. We'll generate a stream of events by
|
17
|
+
# watching the filesystem for changes (Franz::Discover and Franz::Watch),
|
18
|
+
# tailing files (Franz::Tail), and generating events (Franz::Agg)
|
19
|
+
#
|
20
|
+
# @param [Hash] opts options for the aggregator
|
21
|
+
# @option opts [Hash] :input ({}) "input" configuration
|
22
|
+
# @option opts [Queue] :output ([]) "output" queue
|
23
|
+
# @option opts [Path] :checkpoint ({}) path to checkpoint file
|
24
|
+
# @option opts [Integer] :checkpoint_interval ({}) seconds between checkpoints
|
25
|
+
# @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
|
26
|
+
def initialize opts={}
|
27
|
+
opts = {
|
28
|
+
checkpoint: 'franz.*.checkpoint',
|
29
|
+
checkpoint_interval: 30,
|
30
|
+
logger: Logger.new(STDOUT),
|
31
|
+
output: [],
|
32
|
+
input: {
|
33
|
+
ignore_before: 0,
|
34
|
+
tail_pool_size: 10,
|
35
|
+
discover_bound: 10_000,
|
36
|
+
watch_bound: 1_000,
|
37
|
+
tail_bound: 1_000,
|
38
|
+
discover_interval: nil,
|
39
|
+
watch_interval: nil,
|
40
|
+
eviction_interval: nil,
|
41
|
+
flush_interval: nil,
|
42
|
+
configs: []
|
43
|
+
}
|
44
|
+
}.deep_merge!(opts)
|
45
|
+
|
46
|
+
@logger = opts[:logger]
|
47
|
+
|
48
|
+
@checkpoint_interval = opts[:checkpoint_interval]
|
49
|
+
@checkpoint_path = opts[:checkpoint].sub('*', '%d')
|
50
|
+
@checkpoint_glob = opts[:checkpoint]
|
51
|
+
|
52
|
+
# The checkpoint contains a Marshalled Hash with a compact representation of
|
53
|
+
# stateful inputs to various Franz streaming classes (e.g. the "known" option
|
54
|
+
# to Franz::Discover). This state file is generated automatically every time
|
55
|
+
# the input exits (see below) and also at regular intervals.
|
56
|
+
checkpoints = Dir[@checkpoint_glob].sort_by { |path| File.mtime path }
|
57
|
+
checkpoints = checkpoints.reject { |path| File.zero? path }
|
58
|
+
last_checkpoint_path = checkpoints.pop
|
59
|
+
state = nil
|
60
|
+
unless last_checkpoint_path.nil?
|
61
|
+
last_checkpoint = File.read(last_checkpoint_path)
|
62
|
+
state = Marshal.load last_checkpoint
|
63
|
+
log.debug 'Loaded %s' % last_checkpoint_path.inspect
|
64
|
+
end
|
65
|
+
|
66
|
+
state = state || {}
|
67
|
+
known = state.keys
|
68
|
+
stats, cursors, seqs = {}, {}, {}
|
69
|
+
known.each do |path|
|
70
|
+
cursor = state[path].delete :cursor
|
71
|
+
seq = state[path].delete :seq
|
72
|
+
cursors[path] = cursor unless cursor.nil?
|
73
|
+
seqs[path] = seq unless seq.nil?
|
74
|
+
stats[path] = state[path]
|
75
|
+
end
|
76
|
+
|
77
|
+
log.debug 'starting input...'
|
78
|
+
|
79
|
+
discoveries = SizedQueue.new opts[:input][:discover_bound]
|
80
|
+
deletions = SizedQueue.new opts[:input][:discover_bound]
|
81
|
+
watch_events = SizedQueue.new opts[:input][:watch_bound]
|
82
|
+
tail_events = SizedQueue.new opts[:input][:tail_bound]
|
83
|
+
|
84
|
+
log.debug 'starting discover...'
|
85
|
+
@disover = Franz::Discover.new \
|
86
|
+
discoveries: discoveries,
|
87
|
+
deletions: deletions,
|
88
|
+
configs: opts[:input][:configs],
|
89
|
+
discover_interval: opts[:input][:discover_interval],
|
90
|
+
ignore_before: opts[:input][:ignore_before],
|
91
|
+
logger: opts[:logger],
|
92
|
+
known: known
|
93
|
+
|
94
|
+
log.debug 'starting tail...'
|
95
|
+
@tail = Franz::Tail.new \
|
96
|
+
watch_events: watch_events,
|
97
|
+
tail_events: tail_events,
|
98
|
+
eviction_interval: opts[:input][:eviction_interval],
|
99
|
+
logger: opts[:logger],
|
100
|
+
cursors: cursors
|
101
|
+
|
102
|
+
log.debug 'starting agg...'
|
103
|
+
@agg = Franz::Agg.new \
|
104
|
+
configs: opts[:input][:configs],
|
105
|
+
tail_events: tail_events,
|
106
|
+
agg_events: opts[:output],
|
107
|
+
flush_interval: opts[:input][:flush_interval],
|
108
|
+
logger: opts[:logger],
|
109
|
+
seqs: seqs
|
110
|
+
|
111
|
+
log.debug 'starting watch...'
|
112
|
+
@watch = Franz::Watch.new \
|
113
|
+
discoveries: discoveries,
|
114
|
+
deletions: deletions,
|
115
|
+
watch_events: watch_events,
|
116
|
+
watch_interval: opts[:input][:watch_interval],
|
117
|
+
logger: opts[:logger],
|
118
|
+
stats: stats
|
119
|
+
|
120
|
+
@stop = false
|
121
|
+
@t = Thread.new do
|
122
|
+
log.debug 'starting checkpoint'
|
123
|
+
until @stop
|
124
|
+
checkpoint
|
125
|
+
sleep @checkpoint_interval
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
log.debug 'started input'
|
130
|
+
end
|
131
|
+
|
132
|
+
# Stop everything. Has the effect of draining all the Queues and waiting on
|
133
|
+
# auxilliarly threads (e.g. eviction) to complete full intervals, so it may
|
134
|
+
# ordinarily take tens of seconds, depending on your configuration.
|
135
|
+
#
|
136
|
+
# @return [Hash] compact internal state
|
137
|
+
def stop
|
138
|
+
return state if @stop
|
139
|
+
@stop = true
|
140
|
+
@t.join
|
141
|
+
@watch.stop
|
142
|
+
@tail.stop
|
143
|
+
@agg.stop
|
144
|
+
log.debug 'stopped input'
|
145
|
+
return state
|
146
|
+
end
|
147
|
+
|
148
|
+
# Return a compact representation of internal state
|
149
|
+
def state
|
150
|
+
stats = @watch.state
|
151
|
+
cursors = @tail.state
|
152
|
+
seqs = @agg.state
|
153
|
+
stats.keys.each do |path|
|
154
|
+
stats[path] ||= {}
|
155
|
+
stats[path][:cursor] = cursors.fetch(path, nil)
|
156
|
+
stats[path][:seq] = seqs.fetch(path, nil)
|
157
|
+
end
|
158
|
+
return stats
|
159
|
+
end
|
160
|
+
|
161
|
+
# Write a checkpoint file given the current state
|
162
|
+
def checkpoint
|
163
|
+
old_checkpoints = Dir[@checkpoint_glob].sort_by { |p| File.mtime p }
|
164
|
+
path = @checkpoint_path % Time.now
|
165
|
+
File.open(path, 'w') { |f| f.write Marshal.dump(state) }
|
166
|
+
old_checkpoints.pop # Keep last two checkpoints
|
167
|
+
old_checkpoints.map { |c| FileUtils.rm c }
|
168
|
+
log.info 'Wrote %s' % path.inspect
|
169
|
+
end
|
170
|
+
|
171
|
+
private
|
172
|
+
def log ; @logger end
|
173
|
+
end
|
174
|
+
end
|
data/lib/franz/logger.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'time'
|
3
|
+
|
4
|
+
require 'colorize'
|
5
|
+
|
6
|
+
module Franz
|
7
|
+
|
8
|
+
# Extending the Logger with TRACE capabilities
|
9
|
+
class ::Logger
|
10
|
+
SEV_LABEL << 'TRACE'
|
11
|
+
TRACE = SEV_LABEL.index('TRACE') # N.B. TRACE is above other levels
|
12
|
+
|
13
|
+
# Send a TRACE-level log line
|
14
|
+
def trace progname, &block
|
15
|
+
add TRACE, nil, progname, &block if @trace
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# A powerful, colorful logger for Franz.
|
20
|
+
class Logger < Logger
|
21
|
+
# Maps each log level to a unique combination of fore- and background colors
|
22
|
+
SEVERITY_COLORS = {
|
23
|
+
'DEBUG' => [ :blue, :default ],
|
24
|
+
'INFO' => [ :green, :default ],
|
25
|
+
'WARN' => [ :yellow, :default ],
|
26
|
+
'ERROR' => [ :red, :default ],
|
27
|
+
'FATAL' => [ :red, :black ],
|
28
|
+
'TRACE' => [ :magenta, :default ]
|
29
|
+
}
|
30
|
+
|
31
|
+
# Create a new, colorful logger.
|
32
|
+
#
|
33
|
+
# @param debug [Boolean] enable DEBUG level logs
|
34
|
+
# @param out [File] output destination for logs
|
35
|
+
def initialize debug=false, trace=false, out=nil
|
36
|
+
out ||= $stdout
|
37
|
+
super out
|
38
|
+
colorize
|
39
|
+
@trace = true if trace
|
40
|
+
self.level = ::Logger::INFO
|
41
|
+
self.level = ::Logger::DEBUG if debug
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def colorize
|
46
|
+
self.formatter = proc do |severity, datetime, _, message|
|
47
|
+
if level == 1
|
48
|
+
message.to_s.colorize(
|
49
|
+
color: SEVERITY_COLORS[severity.to_s][0],
|
50
|
+
background: SEVERITY_COLORS[severity.to_s][1]
|
51
|
+
) + "\n"
|
52
|
+
else
|
53
|
+
"%s [%s] %s -- %s\n".colorize(
|
54
|
+
color: SEVERITY_COLORS[severity.to_s][0],
|
55
|
+
background: SEVERITY_COLORS[severity.to_s][1]
|
56
|
+
) % [
|
57
|
+
severity,
|
58
|
+
datetime.iso8601(6),
|
59
|
+
File::basename(caller[4]),
|
60
|
+
message
|
61
|
+
]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Franz ships line-oriented log files to RabbitMQ. Think barebones logstash in
|
2
|
+
# pure Ruby with more modest compute and memory requirements.
|
3
|
+
module Franz
|
4
|
+
|
5
|
+
# We use a VERSION file to tie into our build pipeline
|
6
|
+
VERSION = File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).strip
|
7
|
+
|
8
|
+
# We don't really do all that much, be humble
|
9
|
+
SUMMARY = 'Aggregate log file events and send them elsewhere'
|
10
|
+
|
11
|
+
# Your benevolent dictator for life
|
12
|
+
AUTHOR = 'Sean Clemmer'
|
13
|
+
|
14
|
+
# Turn here to strangle your dictator
|
15
|
+
EMAIL = 'sclemmer@bluejeans.com'
|
16
|
+
|
17
|
+
# Every project deserves its own ASCII art
|
18
|
+
ART = <<-'EOART' % VERSION
|
19
|
+
|
20
|
+
.--.,
|
21
|
+
,--.' \ __ ,-. ,---, ,----,
|
22
|
+
| | /\/,' ,'/ /| ,-+-. / | .' .`|
|
23
|
+
: : : ' | |' | ,--.--. ,--.'|' | .' .' .'
|
24
|
+
: | |-,| | ,'/ \ | | ,"' |,---, ' ./
|
25
|
+
| : :/|' : / .--. .-. | | | / | |; | .' /
|
26
|
+
| | .'| | ' \__\/: . . | | | | |`---' / ;--,
|
27
|
+
' : ' ; : | ," .--.; | | | | |/ / / / .`|
|
28
|
+
| | | | , ; / / ,. | | | |--' ./__; .'
|
29
|
+
| : \ ---' ; : .' \| |/ ; | .'
|
30
|
+
| |,' | , .-./'---' `---'
|
31
|
+
`--' `--`---' v%s
|
32
|
+
EOART
|
33
|
+
end
|
data/lib/franz/output.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
require 'bunny'
|
4
|
+
require 'deep_merge'
|
5
|
+
|
6
|
+
module Franz
|
7
|
+
|
8
|
+
# RabbitMQ output for Franz. You must declare an x-consistent-hash type
|
9
|
+
# exchange, as we generate random Integers for routing keys.
|
10
|
+
class Output
|
11
|
+
|
12
|
+
# Start a new output in the background. We'll consume from the input queue
|
13
|
+
# and ship events to the configured RabbitMQ cluster.
|
14
|
+
#
|
15
|
+
# @param [Hash] opts options for the output
|
16
|
+
# @option opts [Queue] :input ([]) "input" queue
|
17
|
+
# @option opts [Hash] :output ({}) "output" configuration
|
18
|
+
def initialize opts={}
|
19
|
+
opts = {
|
20
|
+
logger: Logger.new(STDOUT),
|
21
|
+
tags: [],
|
22
|
+
input: [],
|
23
|
+
output: {
|
24
|
+
exchange: {
|
25
|
+
name: 'test',
|
26
|
+
durable: true
|
27
|
+
},
|
28
|
+
connection: {
|
29
|
+
host: 'localhost',
|
30
|
+
port: 5672
|
31
|
+
}
|
32
|
+
}
|
33
|
+
}.deep_merge!(opts)
|
34
|
+
|
35
|
+
@logger = opts[:logger]
|
36
|
+
|
37
|
+
rabbit = Bunny.new opts[:output][:connection]
|
38
|
+
rabbit.start
|
39
|
+
|
40
|
+
channel = rabbit.create_channel
|
41
|
+
exchange = opts[:output][:exchange].delete(:name)
|
42
|
+
exchange = channel.exchange exchange, \
|
43
|
+
opts[:output][:exchange].merge(type: 'x-consistent-hash')
|
44
|
+
|
45
|
+
@stop = false
|
46
|
+
@foreground = opts[:foreground]
|
47
|
+
|
48
|
+
@thread = Thread.new do
|
49
|
+
rand = Random.new
|
50
|
+
until @stop
|
51
|
+
event = opts[:input].shift
|
52
|
+
event[:tags] = opts[:tags] unless opts[:tags].empty?
|
53
|
+
log.trace 'publishing event=%s' % event.inspect
|
54
|
+
exchange.publish \
|
55
|
+
JSON::generate(event),
|
56
|
+
routing_key: rand.rand(1_000_000),
|
57
|
+
persistent: false
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
@thread.join if @foreground
|
62
|
+
end
|
63
|
+
|
64
|
+
# Join the Output thread. Effectively only once.
|
65
|
+
def join
|
66
|
+
return if @foreground
|
67
|
+
@foreground = true
|
68
|
+
@thread.join
|
69
|
+
end
|
70
|
+
|
71
|
+
# Stop the Output thread. Effectively only once.
|
72
|
+
def stop
|
73
|
+
return if @foreground
|
74
|
+
@foreground = true
|
75
|
+
@thread.kill
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
def log ; @logger end
|
80
|
+
end
|
81
|
+
end
|