franz 1.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +15 -0
- data/LICENSE +13 -0
- data/Rakefile +52 -0
- data/Readme.md +50 -0
- data/VERSION +1 -0
- data/bin/franz +81 -0
- data/franz.gemspec +25 -0
- data/lib/franz.rb +10 -0
- data/lib/franz/agg.rb +158 -0
- data/lib/franz/config.rb +24 -0
- data/lib/franz/discover.rb +108 -0
- data/lib/franz/input.rb +174 -0
- data/lib/franz/logger.rb +66 -0
- data/lib/franz/metadata.rb +33 -0
- data/lib/franz/output.rb +81 -0
- data/lib/franz/sash.rb +81 -0
- data/lib/franz/tail.rb +191 -0
- data/lib/franz/tail_pool.rb +68 -0
- data/lib/franz/watch.rb +180 -0
- data/test/test_franz_agg.rb +97 -0
- data/test/test_franz_discover.rb +88 -0
- data/test/test_franz_tail.rb +132 -0
- data/test/test_franz_watch.rb +144 -0
- metadata +155 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 56734aa5910e2ab4f20bca8e21ae5cc76bbcfb9d
|
4
|
+
data.tar.gz: 7bb3ea267a09bdeb68b1e0320daacd04c760890a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 58ceaffc0dc31e5fc7ea08c77d38c32eb7ace575f000182bb629d0c69888b4e7539e50e83d8a08cbb84e6d15b68b1cd5a03e55ed75f89ed0f35a225cdae79444
|
7
|
+
data.tar.gz: 80c24a9410ea0e4404541f4eecbe2a87becec61fe193758f346a1699d7db145eeba80fde2e79a598df7427421d3ed995a7ad54b77c9a9628bef7146d8a3358a1
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2014 Sean Clemmer and Blue Jeans Network
|
2
|
+
|
3
|
+
Permission to use, copy, modify, and/or distribute this software for any
|
4
|
+
purpose with or without fee is hereby granted, provided that the above
|
5
|
+
copyright notice and this permission notice appear in all copies.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
8
|
+
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
9
|
+
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
10
|
+
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
11
|
+
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
12
|
+
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
13
|
+
PERFORMANCE OF THIS SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
require 'rake'
|
4
|
+
|
5
|
+
|
6
|
+
require 'rake/testtask'
|
7
|
+
Rake::TestTask.new(:test) do |test|
|
8
|
+
test.libs << 'lib' << 'test'
|
9
|
+
test.test_files = FileList['test/test*.rb']
|
10
|
+
test.verbose = true
|
11
|
+
end
|
12
|
+
|
13
|
+
task :default => :test
|
14
|
+
|
15
|
+
|
16
|
+
require 'yard'
|
17
|
+
YARD::Rake::YardocTask.new do |t|
|
18
|
+
t.files = %w[ --readme Readme.md lib/**/*.rb - VERSION ]
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
require 'rubygems/tasks'
|
23
|
+
Gem::Tasks.new({
|
24
|
+
push: false,
|
25
|
+
sign: {}
|
26
|
+
}) do |tasks|
|
27
|
+
tasks.console.command = 'pry'
|
28
|
+
end
|
29
|
+
Gem::Tasks::Sign::Checksum.new sha2: true
|
30
|
+
|
31
|
+
|
32
|
+
require 'rake/version_task'
|
33
|
+
Rake::VersionTask.new
|
34
|
+
|
35
|
+
|
36
|
+
desc "Upload build artifacts to WOPR"
|
37
|
+
task :upload => :build do
|
38
|
+
pkg_name = 'franz-%s.gem' % File.read('VERSION').strip
|
39
|
+
pkg_path = File.join 'pkg', pkg_name
|
40
|
+
|
41
|
+
require 'net/ftp'
|
42
|
+
ftp = Net::FTP.new
|
43
|
+
ftp.connect '10.4.4.15', 8080
|
44
|
+
ftp.login
|
45
|
+
ftp.passive
|
46
|
+
begin
|
47
|
+
ftp.put pkg_path
|
48
|
+
ftp.sendcmd("SITE CHMOD 0664 #{pkg_name}")
|
49
|
+
ensure
|
50
|
+
ftp.close
|
51
|
+
end
|
52
|
+
end
|
data/Readme.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# Franz
|
2
|
+
|
3
|
+
Franz ships line-oriented log files to [RabbitMQ](http://www.rabbitmq.com/).
|
4
|
+
Think barebones [logstash](http://logstash.net/) in pure Ruby with more modest
|
5
|
+
compute and memory requirements.
|
6
|
+
|
7
|
+
This is really to avoid the JVM tax, but you probably still want logstash agents
|
8
|
+
doing the bulk of the log processing. Using this setup, RabbitMQ and logstash
|
9
|
+
may be scaled and restarted independently, so new configurations may be applied
|
10
|
+
without interrupting those precious log hosts.
|
11
|
+
|
12
|
+
Even so, Franz was designed to be interruped. Before exiting, Franz drains his
|
13
|
+
event queues and write any "leftover" state disk. When he's called next, he picks
|
14
|
+
up those leftovers and continues as if he were paused.
|
15
|
+
|
16
|
+
He's also got a couple of improvements over logstash. Let's discuss!
|
17
|
+
|
18
|
+
|
19
|
+
## Improvements
|
20
|
+
|
21
|
+
First let me say logstash is an awesome hunk of software thanks to the hard
|
22
|
+
work of Jordan Sissel and the entire logstash community. Keep it up!
|
23
|
+
|
24
|
+
### Multiline Flush
|
25
|
+
|
26
|
+
Anyone familiar with multiline codecs and filters in logstash is familiar with
|
27
|
+
the multiline flush issue: You finish writing your log file, you close it and
|
28
|
+
wait for it to make it through logstash, but hold up. Where's the last line?
|
29
|
+
That's right, stuck. Because logstash is expecting a steady stream of events and
|
30
|
+
that last one is being buffered so logstash can decide whether its a multiline
|
31
|
+
event. Yup, there's an outstanding issue: [LOGSTASH-271](https://logstash.jira.com/browse/LOGSTASH-271).
|
32
|
+
Yup, there's a fix: [Pull #1260](https://github.com/elasticsearch/logstash/pull/1260).
|
33
|
+
But it's not yet officially sanctioned. Such is life. At any rate, you don't
|
34
|
+
have to deal with this issue in Franz, he flushes inactive buffers after a time.
|
35
|
+
Easy-peasy, lemon-squeezy.
|
36
|
+
|
37
|
+
### File Handle Eviction
|
38
|
+
|
39
|
+
Now I'm not actually sure this issue affects logstash proper, but it's one you
|
40
|
+
might face if you decide to write your own, so here goes: If you're tailing a
|
41
|
+
bunch of files and you never let go of their file handles, you might very well
|
42
|
+
exhaust your ulimit after running for a while. Because Franz is designed to be
|
43
|
+
a daemon, he releases or "evicts" file handles after a period of inactivity.
|
44
|
+
|
45
|
+
### Sequential Identifiers
|
46
|
+
|
47
|
+
Okay one last feature: Every log event is assigned a sequential identifier
|
48
|
+
according to its path (and implicitly, host) in the `@seq` field. This is useful
|
49
|
+
if you expect your packets to get criss-crossed and you want to reconstruct the
|
50
|
+
events in order without relying on timestamps, which you shouldn't.
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.2.7
|
data/bin/franz
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'json'
|
3
|
+
require 'thread'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
require 'bunny'
|
7
|
+
require 'trollop'
|
8
|
+
require 'franz'
|
9
|
+
|
10
|
+
|
11
|
+
# If we find a local config, try to use it
|
12
|
+
# N.B. We'll fall back to the last guy listed here
|
13
|
+
config = nil
|
14
|
+
%w[
|
15
|
+
config.json
|
16
|
+
%{HOME}/.franz.json
|
17
|
+
/etc/franz/franz.json
|
18
|
+
].each do |path|
|
19
|
+
config = path % ENV rescue next
|
20
|
+
break if File.exist? config
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
# Franz really only accepts a config file as an option, and that config file
|
25
|
+
# has got to conform to a certain format. If you're unsure, just look at the
|
26
|
+
# default options hashes in both Franz::Input and Franz::Output
|
27
|
+
opts = Trollop::options(ARGV) do
|
28
|
+
version Franz::VERSION
|
29
|
+
banner Franz::ART + "\n\n" + <<-EOS.gsub(/^ /, '')
|
30
|
+
#{Franz::SUMMARY}
|
31
|
+
|
32
|
+
Usage: franz [<options>]
|
33
|
+
|
34
|
+
Options:
|
35
|
+
EOS
|
36
|
+
opt :config, 'Configuration file to use', type: :string, default: config
|
37
|
+
opt :debug, 'Enable debugging output', default: false
|
38
|
+
opt :trace, 'Enable trace output', default: false
|
39
|
+
opt :log, 'Log to file, not STDOUT', type: :string, default: nil
|
40
|
+
end
|
41
|
+
|
42
|
+
Thread.abort_on_exception = true # Die quickly and with great ceremony
|
43
|
+
|
44
|
+
config = Franz::Config.new opts[:config]
|
45
|
+
logger = Franz::Logger.new opts[:debug], opts[:trace], opts[:log]
|
46
|
+
|
47
|
+
io_bound = config[:output][:bound] || 10_000
|
48
|
+
|
49
|
+
begin
|
50
|
+
io = SizedQueue.new io_bound
|
51
|
+
|
52
|
+
# Now we'll connect to our output, RabbitMQ. This creates a new thread in the
|
53
|
+
# background, which will consume the events generated by our input on io
|
54
|
+
fout = Franz::Output.new \
|
55
|
+
input: io,
|
56
|
+
output: config[:output][:rabbitmq],
|
57
|
+
logger: logger,
|
58
|
+
tags: config[:output][:tags]
|
59
|
+
|
60
|
+
# Franz has only one kind of input, plain text files.
|
61
|
+
fin = Franz::Input.new \
|
62
|
+
input: config[:input],
|
63
|
+
output: io,
|
64
|
+
logger: logger,
|
65
|
+
checkpoint: config[:checkpoint],
|
66
|
+
checkpoint_interval: config[:checkpoint_interval]
|
67
|
+
|
68
|
+
# Remember, both the input and output were started up in background threads,
|
69
|
+
# so we'll have to wait here in main or else we'll just exit.
|
70
|
+
fout.join
|
71
|
+
|
72
|
+
rescue SignalException => e
|
73
|
+
logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
|
74
|
+
rescue SystemExit, Interrupt => e
|
75
|
+
logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
|
76
|
+
ensure
|
77
|
+
logger.info 'Draining. This may take a while...'
|
78
|
+
fin.stop
|
79
|
+
fin.checkpoint
|
80
|
+
logger.info 'Bye!'
|
81
|
+
end
|
data/franz.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path(File.join('..', 'lib'), __FILE__)
|
3
|
+
require 'franz/metadata'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'franz'
|
7
|
+
s.version = Franz::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.author = Franz::AUTHOR
|
10
|
+
s.email = Franz::EMAIL
|
11
|
+
s.summary = Franz::SUMMARY
|
12
|
+
s.description = Franz::SUMMARY + '.'
|
13
|
+
|
14
|
+
s.add_runtime_dependency 'bunny', '~> 1'
|
15
|
+
s.add_runtime_dependency 'buftok', '~> 0'
|
16
|
+
s.add_runtime_dependency 'trollop', '~> 2'
|
17
|
+
s.add_runtime_dependency 'colorize', '~> 0'
|
18
|
+
s.add_runtime_dependency 'deep_merge', '~> 1'
|
19
|
+
s.add_runtime_dependency 'consistent-hashing', '~> 1'
|
20
|
+
|
21
|
+
s.files = `git ls-files`.split("\n")
|
22
|
+
s.test_files = `git ls-files -- test/*`.split("\n")
|
23
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File::basename(f) }
|
24
|
+
s.require_paths = %w[ lib ]
|
25
|
+
end
|
data/lib/franz.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require_relative 'franz/agg'
|
2
|
+
require_relative 'franz/config'
|
3
|
+
require_relative 'franz/discover'
|
4
|
+
require_relative 'franz/input'
|
5
|
+
require_relative 'franz/logger'
|
6
|
+
require_relative 'franz/metadata'
|
7
|
+
require_relative 'franz/output'
|
8
|
+
require_relative 'franz/tail'
|
9
|
+
require_relative 'franz/tail_pool'
|
10
|
+
require_relative 'franz/watch'
|
data/lib/franz/agg.rb
ADDED
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'thread'
|
3
|
+
require 'socket'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
require_relative 'sash'
|
7
|
+
|
8
|
+
module Franz
|
9
|
+
|
10
|
+
# Agg mostly aggregates Tail events by applying the multiline filter, but it
|
11
|
+
# also applies the "host" and "type" fields. Basically, it does all the post-
|
12
|
+
# processing after we've retreived a line from a file.
|
13
|
+
class Agg
|
14
|
+
@@host = Socket.gethostname # We'll apply the hostname to all events
|
15
|
+
|
16
|
+
attr_reader :seqs
|
17
|
+
|
18
|
+
# Start a new Agg thread in the background.
|
19
|
+
#
|
20
|
+
# @param [Hash] opts options for the aggregator
|
21
|
+
# @option opts [Array<Hash>] :configs ([]) file input configuration
|
22
|
+
# @option opts [Queue] :tail_events ([]) "input" queue from Tail
|
23
|
+
# @option opts [Queue] :agg_events ([]) "output" queue
|
24
|
+
# @option opts [Integer] :flush_interval (5) seconds between flushes
|
25
|
+
# @option opts [Hash<Path,Fixnum>] :seqs ({}) internal "seqs" state
|
26
|
+
# @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
|
27
|
+
def initialize opts={}
|
28
|
+
@configs = opts[:configs] || Array.new
|
29
|
+
@tail_events = opts[:tail_events] || []
|
30
|
+
@agg_events = opts[:agg_events] || []
|
31
|
+
@flush_interval = opts[:flush_interval] || 10
|
32
|
+
@seqs = opts[:seqs] || Hash.new
|
33
|
+
@logger = opts[:logger] || Logger.new(STDOUT)
|
34
|
+
|
35
|
+
@types = Hash.new
|
36
|
+
@lock = Mutex.new
|
37
|
+
@buffer = Franz::Sash.new
|
38
|
+
@stop = false
|
39
|
+
|
40
|
+
@t1 = Thread.new do
|
41
|
+
log.debug 'starting agg-flush'
|
42
|
+
until @stop
|
43
|
+
flush
|
44
|
+
sleep flush_interval
|
45
|
+
end
|
46
|
+
sleep flush_interval
|
47
|
+
flush
|
48
|
+
end
|
49
|
+
|
50
|
+
@t2 = Thread.new do
|
51
|
+
log.debug 'starting agg-capture'
|
52
|
+
until @stop
|
53
|
+
capture
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
log.debug 'started agg'
|
58
|
+
end
|
59
|
+
|
60
|
+
# Stop the Agg thread. Effectively only once.
|
61
|
+
#
|
62
|
+
# @return [Hash] internal "seqs" state
|
63
|
+
def stop
|
64
|
+
return state if @stop
|
65
|
+
@stop = true
|
66
|
+
@t2.kill
|
67
|
+
@t1.join
|
68
|
+
log.debug 'stopped agg'
|
69
|
+
return state
|
70
|
+
end
|
71
|
+
|
72
|
+
# Return the internal "seqs" state
|
73
|
+
def state
|
74
|
+
return @seqs.dup
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
attr_reader :configs, :tail_events, :agg_events, :flush_interval, :seqs, :types, :lock, :buffer
|
79
|
+
|
80
|
+
def log ; @logger end
|
81
|
+
|
82
|
+
def type path
|
83
|
+
begin
|
84
|
+
@types.fetch path
|
85
|
+
rescue KeyError
|
86
|
+
configs.each do |config|
|
87
|
+
type = config[:type] if config[:includes].any? { |glob|
|
88
|
+
included = File.fnmatch? glob, path
|
89
|
+
excludes = !config[:excludes].nil?
|
90
|
+
excluded = excludes && config[:excludes].any? { |exlude|
|
91
|
+
File.fnmatch? exlude, File::basename(path)
|
92
|
+
}
|
93
|
+
included && !excluded
|
94
|
+
}
|
95
|
+
return @types[path] = type unless type.nil?
|
96
|
+
end
|
97
|
+
log.error 'Could not identify type for path=%s' % path
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def config path
|
102
|
+
configs.select { |c| c[:type] == type(path) }.shift
|
103
|
+
end
|
104
|
+
|
105
|
+
def seq path
|
106
|
+
seqs[path] = seqs.fetch(path, 0) + 1
|
107
|
+
end
|
108
|
+
|
109
|
+
def real_path path
|
110
|
+
Pathname.new(path).realpath.to_s rescue path
|
111
|
+
end
|
112
|
+
|
113
|
+
def enqueue path, message
|
114
|
+
p = real_path path
|
115
|
+
t = type path
|
116
|
+
s = seq path
|
117
|
+
m = message.encode 'UTF-8', invalid: :replace, undef: :replace, replace: '?'
|
118
|
+
log.trace 'enqueue type=%s path=%s seq=%d message=%s' % [
|
119
|
+
t.inspect, p.inspect, s.inspect, m.inspect
|
120
|
+
]
|
121
|
+
agg_events.push path: p, message: m, type: t, host: @@host, '@seq' => s
|
122
|
+
end
|
123
|
+
|
124
|
+
def capture
|
125
|
+
event = tail_events.shift
|
126
|
+
log.trace 'received path=%s line=%s' % [
|
127
|
+
event[:path], event[:line]
|
128
|
+
]
|
129
|
+
multiline = config(event[:path])[:multiline]
|
130
|
+
if multiline.nil?
|
131
|
+
enqueue event[:path], event[:line] unless event[:line].empty?
|
132
|
+
else
|
133
|
+
lock.synchronize do
|
134
|
+
if event[:line] =~ multiline
|
135
|
+
buffered = buffer.flush(event[:path])
|
136
|
+
lines = buffered.map { |e| e[:line] }.join("\n")
|
137
|
+
enqueue event[:path], lines unless lines.empty?
|
138
|
+
end
|
139
|
+
buffer.insert event[:path], event
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def flush
|
145
|
+
lock.synchronize do
|
146
|
+
started = Time.now
|
147
|
+
buffer.keys.each do |path|
|
148
|
+
if started - buffer.mtime(path) >= flush_interval
|
149
|
+
log.trace 'flushing path=%s' % path.inspect
|
150
|
+
buffered = buffer.remove(path)
|
151
|
+
lines = buffered.map { |e| e[:line] }.join("\n")
|
152
|
+
enqueue path, lines unless lines.empty?
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
data/lib/franz/config.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Franz
|
4
|
+
|
5
|
+
# All things configuration.
|
6
|
+
class Config
|
7
|
+
|
8
|
+
# Load a config file path into a Hash, converting to some native types where
|
9
|
+
# appropriate (e.g. a String denoting a Regexp will become Regexp).
|
10
|
+
#
|
11
|
+
# @param path [String] path to a config file
|
12
|
+
#
|
13
|
+
# @return [Hash] config compiled into a native Hash
|
14
|
+
def self.new path
|
15
|
+
config = JSON::parse File.read(path), symbolize_names: true
|
16
|
+
config[:input][:configs].map! do |input|
|
17
|
+
input[:multiline] = Regexp.new input[:multiline] if input.has_key?(:multiline)
|
18
|
+
input[:type] = input[:type].to_sym
|
19
|
+
input
|
20
|
+
end
|
21
|
+
return config
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|