franz 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +15 -0
- data/LICENSE +13 -0
- data/Rakefile +52 -0
- data/Readme.md +50 -0
- data/VERSION +1 -0
- data/bin/franz +81 -0
- data/franz.gemspec +25 -0
- data/lib/franz.rb +10 -0
- data/lib/franz/agg.rb +158 -0
- data/lib/franz/config.rb +24 -0
- data/lib/franz/discover.rb +108 -0
- data/lib/franz/input.rb +174 -0
- data/lib/franz/logger.rb +66 -0
- data/lib/franz/metadata.rb +33 -0
- data/lib/franz/output.rb +81 -0
- data/lib/franz/sash.rb +81 -0
- data/lib/franz/tail.rb +191 -0
- data/lib/franz/tail_pool.rb +68 -0
- data/lib/franz/watch.rb +180 -0
- data/test/test_franz_agg.rb +97 -0
- data/test/test_franz_discover.rb +88 -0
- data/test/test_franz_tail.rb +132 -0
- data/test/test_franz_watch.rb +144 -0
- metadata +155 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 56734aa5910e2ab4f20bca8e21ae5cc76bbcfb9d
|
4
|
+
data.tar.gz: 7bb3ea267a09bdeb68b1e0320daacd04c760890a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 58ceaffc0dc31e5fc7ea08c77d38c32eb7ace575f000182bb629d0c69888b4e7539e50e83d8a08cbb84e6d15b68b1cd5a03e55ed75f89ed0f35a225cdae79444
|
7
|
+
data.tar.gz: 80c24a9410ea0e4404541f4eecbe2a87becec61fe193758f346a1699d7db145eeba80fde2e79a598df7427421d3ed995a7ad54b77c9a9628bef7146d8a3358a1
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2014 Sean Clemmer and Blue Jeans Network
|
2
|
+
|
3
|
+
Permission to use, copy, modify, and/or distribute this software for any
|
4
|
+
purpose with or without fee is hereby granted, provided that the above
|
5
|
+
copyright notice and this permission notice appear in all copies.
|
6
|
+
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
8
|
+
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
9
|
+
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
10
|
+
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
11
|
+
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
12
|
+
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
13
|
+
PERFORMANCE OF THIS SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
require 'rake'
|
4
|
+
|
5
|
+
|
6
|
+
require 'rake/testtask'
|
7
|
+
Rake::TestTask.new(:test) do |test|
|
8
|
+
test.libs << 'lib' << 'test'
|
9
|
+
test.test_files = FileList['test/test*.rb']
|
10
|
+
test.verbose = true
|
11
|
+
end
|
12
|
+
|
13
|
+
task :default => :test
|
14
|
+
|
15
|
+
|
16
|
+
require 'yard'
|
17
|
+
YARD::Rake::YardocTask.new do |t|
|
18
|
+
t.files = %w[ --readme Readme.md lib/**/*.rb - VERSION ]
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
require 'rubygems/tasks'
|
23
|
+
Gem::Tasks.new({
|
24
|
+
push: false,
|
25
|
+
sign: {}
|
26
|
+
}) do |tasks|
|
27
|
+
tasks.console.command = 'pry'
|
28
|
+
end
|
29
|
+
Gem::Tasks::Sign::Checksum.new sha2: true
|
30
|
+
|
31
|
+
|
32
|
+
require 'rake/version_task'
|
33
|
+
Rake::VersionTask.new
|
34
|
+
|
35
|
+
|
36
|
+
desc "Upload build artifacts to WOPR"
|
37
|
+
task :upload => :build do
|
38
|
+
pkg_name = 'franz-%s.gem' % File.read('VERSION').strip
|
39
|
+
pkg_path = File.join 'pkg', pkg_name
|
40
|
+
|
41
|
+
require 'net/ftp'
|
42
|
+
ftp = Net::FTP.new
|
43
|
+
ftp.connect '10.4.4.15', 8080
|
44
|
+
ftp.login
|
45
|
+
ftp.passive
|
46
|
+
begin
|
47
|
+
ftp.put pkg_path
|
48
|
+
ftp.sendcmd("SITE CHMOD 0664 #{pkg_name}")
|
49
|
+
ensure
|
50
|
+
ftp.close
|
51
|
+
end
|
52
|
+
end
|
data/Readme.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# Franz
|
2
|
+
|
3
|
+
Franz ships line-oriented log files to [RabbitMQ](http://www.rabbitmq.com/).
|
4
|
+
Think barebones [logstash](http://logstash.net/) in pure Ruby with more modest
|
5
|
+
compute and memory requirements.
|
6
|
+
|
7
|
+
This is really to avoid the JVM tax, but you probably still want logstash agents
|
8
|
+
doing the bulk of the log processing. Using this setup, RabbitMQ and logstash
|
9
|
+
may be scaled and restarted independently, so new configurations may be applied
|
10
|
+
without interrupting those precious log hosts.
|
11
|
+
|
12
|
+
Even so, Franz was designed to be interruped. Before exiting, Franz drains his
|
13
|
+
event queues and write any "leftover" state disk. When he's called next, he picks
|
14
|
+
up those leftovers and continues as if he were paused.
|
15
|
+
|
16
|
+
He's also got a couple of improvements over logstash. Let's discuss!
|
17
|
+
|
18
|
+
|
19
|
+
## Improvements
|
20
|
+
|
21
|
+
First let me say logstash is an awesome hunk of software thanks to the hard
|
22
|
+
work of Jordan Sissel and the entire logstash community. Keep it up!
|
23
|
+
|
24
|
+
### Multiline Flush
|
25
|
+
|
26
|
+
Anyone familiar with multiline codecs and filters in logstash is familiar with
|
27
|
+
the multiline flush issue: You finish writing your log file, you close it and
|
28
|
+
wait for it to make it through logstash, but hold up. Where's the last line?
|
29
|
+
That's right, stuck. Because logstash is expecting a steady stream of events and
|
30
|
+
that last one is being buffered so logstash can decide whether its a multiline
|
31
|
+
event. Yup, there's an outstanding issue: [LOGSTASH-271](https://logstash.jira.com/browse/LOGSTASH-271).
|
32
|
+
Yup, there's a fix: [Pull #1260](https://github.com/elasticsearch/logstash/pull/1260).
|
33
|
+
But it's not yet officially sanctioned. Such is life. At any rate, you don't
|
34
|
+
have to deal with this issue in Franz, he flushes inactive buffers after a time.
|
35
|
+
Easy-peasy, lemon-squeezy.
|
36
|
+
|
37
|
+
### File Handle Eviction
|
38
|
+
|
39
|
+
Now I'm not actually sure this issue affects logstash proper, but it's one you
|
40
|
+
might face if you decide to write your own, so here goes: If you're tailing a
|
41
|
+
bunch of files and you never let go of their file handles, you might very well
|
42
|
+
exhaust your ulimit after running for a while. Because Franz is designed to be
|
43
|
+
a daemon, he releases or "evicts" file handles after a period of inactivity.
|
44
|
+
|
45
|
+
### Sequential Identifiers
|
46
|
+
|
47
|
+
Okay one last feature: Every log event is assigned a sequential identifier
|
48
|
+
according to its path (and implicitly, host) in the `@seq` field. This is useful
|
49
|
+
if you expect your packets to get criss-crossed and you want to reconstruct the
|
50
|
+
events in order without relying on timestamps, which you shouldn't.
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.2.7
|
data/bin/franz
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'json'
|
3
|
+
require 'thread'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
require 'bunny'
|
7
|
+
require 'trollop'
|
8
|
+
require 'franz'
|
9
|
+
|
10
|
+
|
11
|
+
# If we find a local config, try to use it
|
12
|
+
# N.B. We'll fall back to the last guy listed here
|
13
|
+
config = nil
|
14
|
+
%w[
|
15
|
+
config.json
|
16
|
+
%{HOME}/.franz.json
|
17
|
+
/etc/franz/franz.json
|
18
|
+
].each do |path|
|
19
|
+
config = path % ENV rescue next
|
20
|
+
break if File.exist? config
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
# Franz really only accepts a config file as an option, and that config file
|
25
|
+
# has got to conform to a certain format. If you're unsure, just look at the
|
26
|
+
# default options hashes in both Franz::Input and Franz::Output
|
27
|
+
opts = Trollop::options(ARGV) do
|
28
|
+
version Franz::VERSION
|
29
|
+
banner Franz::ART + "\n\n" + <<-EOS.gsub(/^ /, '')
|
30
|
+
#{Franz::SUMMARY}
|
31
|
+
|
32
|
+
Usage: franz [<options>]
|
33
|
+
|
34
|
+
Options:
|
35
|
+
EOS
|
36
|
+
opt :config, 'Configuration file to use', type: :string, default: config
|
37
|
+
opt :debug, 'Enable debugging output', default: false
|
38
|
+
opt :trace, 'Enable trace output', default: false
|
39
|
+
opt :log, 'Log to file, not STDOUT', type: :string, default: nil
|
40
|
+
end
|
41
|
+
|
42
|
+
Thread.abort_on_exception = true # Die quickly and with great ceremony
|
43
|
+
|
44
|
+
config = Franz::Config.new opts[:config]
|
45
|
+
logger = Franz::Logger.new opts[:debug], opts[:trace], opts[:log]
|
46
|
+
|
47
|
+
io_bound = config[:output][:bound] || 10_000
|
48
|
+
|
49
|
+
begin
|
50
|
+
io = SizedQueue.new io_bound
|
51
|
+
|
52
|
+
# Now we'll connect to our output, RabbitMQ. This creates a new thread in the
|
53
|
+
# background, which will consume the events generated by our input on io
|
54
|
+
fout = Franz::Output.new \
|
55
|
+
input: io,
|
56
|
+
output: config[:output][:rabbitmq],
|
57
|
+
logger: logger,
|
58
|
+
tags: config[:output][:tags]
|
59
|
+
|
60
|
+
# Franz has only one kind of input, plain text files.
|
61
|
+
fin = Franz::Input.new \
|
62
|
+
input: config[:input],
|
63
|
+
output: io,
|
64
|
+
logger: logger,
|
65
|
+
checkpoint: config[:checkpoint],
|
66
|
+
checkpoint_interval: config[:checkpoint_interval]
|
67
|
+
|
68
|
+
# Remember, both the input and output were started up in background threads,
|
69
|
+
# so we'll have to wait here in main or else we'll just exit.
|
70
|
+
fout.join
|
71
|
+
|
72
|
+
rescue SignalException => e
|
73
|
+
logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
|
74
|
+
rescue SystemExit, Interrupt => e
|
75
|
+
logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
|
76
|
+
ensure
|
77
|
+
logger.info 'Draining. This may take a while...'
|
78
|
+
fin.stop
|
79
|
+
fin.checkpoint
|
80
|
+
logger.info 'Bye!'
|
81
|
+
end
|
data/franz.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path(File.join('..', 'lib'), __FILE__)
|
3
|
+
require 'franz/metadata'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'franz'
|
7
|
+
s.version = Franz::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.author = Franz::AUTHOR
|
10
|
+
s.email = Franz::EMAIL
|
11
|
+
s.summary = Franz::SUMMARY
|
12
|
+
s.description = Franz::SUMMARY + '.'
|
13
|
+
|
14
|
+
s.add_runtime_dependency 'bunny', '~> 1'
|
15
|
+
s.add_runtime_dependency 'buftok', '~> 0'
|
16
|
+
s.add_runtime_dependency 'trollop', '~> 2'
|
17
|
+
s.add_runtime_dependency 'colorize', '~> 0'
|
18
|
+
s.add_runtime_dependency 'deep_merge', '~> 1'
|
19
|
+
s.add_runtime_dependency 'consistent-hashing', '~> 1'
|
20
|
+
|
21
|
+
s.files = `git ls-files`.split("\n")
|
22
|
+
s.test_files = `git ls-files -- test/*`.split("\n")
|
23
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File::basename(f) }
|
24
|
+
s.require_paths = %w[ lib ]
|
25
|
+
end
|
data/lib/franz.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require_relative 'franz/agg'
|
2
|
+
require_relative 'franz/config'
|
3
|
+
require_relative 'franz/discover'
|
4
|
+
require_relative 'franz/input'
|
5
|
+
require_relative 'franz/logger'
|
6
|
+
require_relative 'franz/metadata'
|
7
|
+
require_relative 'franz/output'
|
8
|
+
require_relative 'franz/tail'
|
9
|
+
require_relative 'franz/tail_pool'
|
10
|
+
require_relative 'franz/watch'
|
data/lib/franz/agg.rb
ADDED
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'thread'
|
3
|
+
require 'socket'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
require_relative 'sash'
|
7
|
+
|
8
|
+
module Franz
|
9
|
+
|
10
|
+
# Agg mostly aggregates Tail events by applying the multiline filter, but it
|
11
|
+
# also applies the "host" and "type" fields. Basically, it does all the post-
|
12
|
+
# processing after we've retreived a line from a file.
|
13
|
+
class Agg
|
14
|
+
@@host = Socket.gethostname # We'll apply the hostname to all events
|
15
|
+
|
16
|
+
attr_reader :seqs
|
17
|
+
|
18
|
+
# Start a new Agg thread in the background.
|
19
|
+
#
|
20
|
+
# @param [Hash] opts options for the aggregator
|
21
|
+
# @option opts [Array<Hash>] :configs ([]) file input configuration
|
22
|
+
# @option opts [Queue] :tail_events ([]) "input" queue from Tail
|
23
|
+
# @option opts [Queue] :agg_events ([]) "output" queue
|
24
|
+
# @option opts [Integer] :flush_interval (5) seconds between flushes
|
25
|
+
# @option opts [Hash<Path,Fixnum>] :seqs ({}) internal "seqs" state
|
26
|
+
# @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
|
27
|
+
def initialize opts={}
|
28
|
+
@configs = opts[:configs] || Array.new
|
29
|
+
@tail_events = opts[:tail_events] || []
|
30
|
+
@agg_events = opts[:agg_events] || []
|
31
|
+
@flush_interval = opts[:flush_interval] || 10
|
32
|
+
@seqs = opts[:seqs] || Hash.new
|
33
|
+
@logger = opts[:logger] || Logger.new(STDOUT)
|
34
|
+
|
35
|
+
@types = Hash.new
|
36
|
+
@lock = Mutex.new
|
37
|
+
@buffer = Franz::Sash.new
|
38
|
+
@stop = false
|
39
|
+
|
40
|
+
@t1 = Thread.new do
|
41
|
+
log.debug 'starting agg-flush'
|
42
|
+
until @stop
|
43
|
+
flush
|
44
|
+
sleep flush_interval
|
45
|
+
end
|
46
|
+
sleep flush_interval
|
47
|
+
flush
|
48
|
+
end
|
49
|
+
|
50
|
+
@t2 = Thread.new do
|
51
|
+
log.debug 'starting agg-capture'
|
52
|
+
until @stop
|
53
|
+
capture
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
log.debug 'started agg'
|
58
|
+
end
|
59
|
+
|
60
|
+
# Stop the Agg thread. Effectively only once.
|
61
|
+
#
|
62
|
+
# @return [Hash] internal "seqs" state
|
63
|
+
def stop
|
64
|
+
return state if @stop
|
65
|
+
@stop = true
|
66
|
+
@t2.kill
|
67
|
+
@t1.join
|
68
|
+
log.debug 'stopped agg'
|
69
|
+
return state
|
70
|
+
end
|
71
|
+
|
72
|
+
# Return the internal "seqs" state
|
73
|
+
def state
|
74
|
+
return @seqs.dup
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
attr_reader :configs, :tail_events, :agg_events, :flush_interval, :seqs, :types, :lock, :buffer
|
79
|
+
|
80
|
+
def log ; @logger end
|
81
|
+
|
82
|
+
def type path
|
83
|
+
begin
|
84
|
+
@types.fetch path
|
85
|
+
rescue KeyError
|
86
|
+
configs.each do |config|
|
87
|
+
type = config[:type] if config[:includes].any? { |glob|
|
88
|
+
included = File.fnmatch? glob, path
|
89
|
+
excludes = !config[:excludes].nil?
|
90
|
+
excluded = excludes && config[:excludes].any? { |exlude|
|
91
|
+
File.fnmatch? exlude, File::basename(path)
|
92
|
+
}
|
93
|
+
included && !excluded
|
94
|
+
}
|
95
|
+
return @types[path] = type unless type.nil?
|
96
|
+
end
|
97
|
+
log.error 'Could not identify type for path=%s' % path
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def config path
|
102
|
+
configs.select { |c| c[:type] == type(path) }.shift
|
103
|
+
end
|
104
|
+
|
105
|
+
def seq path
|
106
|
+
seqs[path] = seqs.fetch(path, 0) + 1
|
107
|
+
end
|
108
|
+
|
109
|
+
def real_path path
|
110
|
+
Pathname.new(path).realpath.to_s rescue path
|
111
|
+
end
|
112
|
+
|
113
|
+
def enqueue path, message
|
114
|
+
p = real_path path
|
115
|
+
t = type path
|
116
|
+
s = seq path
|
117
|
+
m = message.encode 'UTF-8', invalid: :replace, undef: :replace, replace: '?'
|
118
|
+
log.trace 'enqueue type=%s path=%s seq=%d message=%s' % [
|
119
|
+
t.inspect, p.inspect, s.inspect, m.inspect
|
120
|
+
]
|
121
|
+
agg_events.push path: p, message: m, type: t, host: @@host, '@seq' => s
|
122
|
+
end
|
123
|
+
|
124
|
+
def capture
|
125
|
+
event = tail_events.shift
|
126
|
+
log.trace 'received path=%s line=%s' % [
|
127
|
+
event[:path], event[:line]
|
128
|
+
]
|
129
|
+
multiline = config(event[:path])[:multiline]
|
130
|
+
if multiline.nil?
|
131
|
+
enqueue event[:path], event[:line] unless event[:line].empty?
|
132
|
+
else
|
133
|
+
lock.synchronize do
|
134
|
+
if event[:line] =~ multiline
|
135
|
+
buffered = buffer.flush(event[:path])
|
136
|
+
lines = buffered.map { |e| e[:line] }.join("\n")
|
137
|
+
enqueue event[:path], lines unless lines.empty?
|
138
|
+
end
|
139
|
+
buffer.insert event[:path], event
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def flush
|
145
|
+
lock.synchronize do
|
146
|
+
started = Time.now
|
147
|
+
buffer.keys.each do |path|
|
148
|
+
if started - buffer.mtime(path) >= flush_interval
|
149
|
+
log.trace 'flushing path=%s' % path.inspect
|
150
|
+
buffered = buffer.remove(path)
|
151
|
+
lines = buffered.map { |e| e[:line] }.join("\n")
|
152
|
+
enqueue path, lines unless lines.empty?
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
data/lib/franz/config.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Franz
|
4
|
+
|
5
|
+
# All things configuration.
|
6
|
+
class Config
|
7
|
+
|
8
|
+
# Load a config file path into a Hash, converting to some native types where
|
9
|
+
# appropriate (e.g. a String denoting a Regexp will become Regexp).
|
10
|
+
#
|
11
|
+
# @param path [String] path to a config file
|
12
|
+
#
|
13
|
+
# @return [Hash] config compiled into a native Hash
|
14
|
+
def self.new path
|
15
|
+
config = JSON::parse File.read(path), symbolize_names: true
|
16
|
+
config[:input][:configs].map! do |input|
|
17
|
+
input[:multiline] = Regexp.new input[:multiline] if input.has_key?(:multiline)
|
18
|
+
input[:type] = input[:type].to_sym
|
19
|
+
input
|
20
|
+
end
|
21
|
+
return config
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|