franz 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 56734aa5910e2ab4f20bca8e21ae5cc76bbcfb9d
4
+ data.tar.gz: 7bb3ea267a09bdeb68b1e0320daacd04c760890a
5
+ SHA512:
6
+ metadata.gz: 58ceaffc0dc31e5fc7ea08c77d38c32eb7ace575f000182bb629d0c69888b4e7539e50e83d8a08cbb84e6d15b68b1cd5a03e55ed75f89ed0f35a225cdae79444
7
+ data.tar.gz: 80c24a9410ea0e4404541f4eecbe2a87becec61fe193758f346a1699d7db145eeba80fde2e79a598df7427421d3ed995a7ad54b77c9a9628bef7146d8a3358a1
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ *~
2
+ *.gem
3
+ *.log
4
+ *.out
5
+ *.pid
6
+ *.swp
7
+ *.state
8
+ *.checkpoint
9
+ .DS_Store
10
+ .yardoc
11
+ doc
12
+ pkg
13
+ config.json
14
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :development do
6
+ gem 'pry'
7
+ gem 'rake'
8
+ gem 'yard'
9
+ gem 'version'
10
+ gem 'rubygems-tasks'
11
+ end
12
+
13
+ group :test do
14
+ gem 'minitest'
15
+ end
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2014 Sean Clemmer and Blue Jeans Network
2
+
3
+ Permission to use, copy, modify, and/or distribute this software for any
4
+ purpose with or without fee is hereby granted, provided that the above
5
+ copyright notice and this permission notice appear in all copies.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
8
+ REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
9
+ AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
10
+ INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
11
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
12
+ OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
13
+ PERFORMANCE OF THIS SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,52 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ require 'rake'
4
+
5
+
6
+ require 'rake/testtask'
7
+ Rake::TestTask.new(:test) do |test|
8
+ test.libs << 'lib' << 'test'
9
+ test.test_files = FileList['test/test*.rb']
10
+ test.verbose = true
11
+ end
12
+
13
+ task :default => :test
14
+
15
+
16
+ require 'yard'
17
+ YARD::Rake::YardocTask.new do |t|
18
+ t.files = %w[ --readme Readme.md lib/**/*.rb - VERSION ]
19
+ end
20
+
21
+
22
+ require 'rubygems/tasks'
23
+ Gem::Tasks.new({
24
+ push: false,
25
+ sign: {}
26
+ }) do |tasks|
27
+ tasks.console.command = 'pry'
28
+ end
29
+ Gem::Tasks::Sign::Checksum.new sha2: true
30
+
31
+
32
+ require 'rake/version_task'
33
+ Rake::VersionTask.new
34
+
35
+
36
+ desc "Upload build artifacts to WOPR"
37
+ task :upload => :build do
38
+ pkg_name = 'franz-%s.gem' % File.read('VERSION').strip
39
+ pkg_path = File.join 'pkg', pkg_name
40
+
41
+ require 'net/ftp'
42
+ ftp = Net::FTP.new
43
+ ftp.connect '10.4.4.15', 8080
44
+ ftp.login
45
+ ftp.passive
46
+ begin
47
+ ftp.put pkg_path
48
+ ftp.sendcmd("SITE CHMOD 0664 #{pkg_name}")
49
+ ensure
50
+ ftp.close
51
+ end
52
+ end
data/Readme.md ADDED
@@ -0,0 +1,50 @@
1
+ # Franz
2
+
3
+ Franz ships line-oriented log files to [RabbitMQ](http://www.rabbitmq.com/).
4
+ Think barebones [logstash](http://logstash.net/) in pure Ruby with more modest
5
+ compute and memory requirements.
6
+
7
+ This is really to avoid the JVM tax, but you probably still want logstash agents
8
+ doing the bulk of the log processing. Using this setup, RabbitMQ and logstash
9
+ may be scaled and restarted independently, so new configurations may be applied
10
+ without interrupting those precious log hosts.
11
+
12
+ Even so, Franz was designed to be interruped. Before exiting, Franz drains his
13
+ event queues and write any "leftover" state disk. When he's called next, he picks
14
+ up those leftovers and continues as if he were paused.
15
+
16
+ He's also got a couple of improvements over logstash. Let's discuss!
17
+
18
+
19
+ ## Improvements
20
+
21
+ First let me say logstash is an awesome hunk of software thanks to the hard
22
+ work of Jordan Sissel and the entire logstash community. Keep it up!
23
+
24
+ ### Multiline Flush
25
+
26
+ Anyone familiar with multiline codecs and filters in logstash is familiar with
27
+ the multiline flush issue: You finish writing your log file, you close it and
28
+ wait for it to make it through logstash, but hold up. Where's the last line?
29
+ That's right, stuck. Because logstash is expecting a steady stream of events and
30
+ that last one is being buffered so logstash can decide whether its a multiline
31
+ event. Yup, there's an outstanding issue: [LOGSTASH-271](https://logstash.jira.com/browse/LOGSTASH-271).
32
+ Yup, there's a fix: [Pull #1260](https://github.com/elasticsearch/logstash/pull/1260).
33
+ But it's not yet officially sanctioned. Such is life. At any rate, you don't
34
+ have to deal with this issue in Franz, he flushes inactive buffers after a time.
35
+ Easy-peasy, lemon-squeezy.
36
+
37
+ ### File Handle Eviction
38
+
39
+ Now I'm not actually sure this issue affects logstash proper, but it's one you
40
+ might face if you decide to write your own, so here goes: If you're tailing a
41
+ bunch of files and you never let go of their file handles, you might very well
42
+ exhaust your ulimit after running for a while. Because Franz is designed to be
43
+ a daemon, he releases or "evicts" file handles after a period of inactivity.
44
+
45
+ ### Sequential Identifiers
46
+
47
+ Okay one last feature: Every log event is assigned a sequential identifier
48
+ according to its path (and implicitly, host) in the `@seq` field. This is useful
49
+ if you expect your packets to get criss-crossed and you want to reconstruct the
50
+ events in order without relying on timestamps, which you shouldn't.
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.2.7
data/bin/franz ADDED
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+ require 'json'
3
+ require 'thread'
4
+ require 'logger'
5
+
6
+ require 'bunny'
7
+ require 'trollop'
8
+ require 'franz'
9
+
10
+
11
+ # If we find a local config, try to use it
12
+ # N.B. We'll fall back to the last guy listed here
13
+ config = nil
14
+ %w[
15
+ config.json
16
+ %{HOME}/.franz.json
17
+ /etc/franz/franz.json
18
+ ].each do |path|
19
+ config = path % ENV rescue next
20
+ break if File.exist? config
21
+ end
22
+
23
+
24
+ # Franz really only accepts a config file as an option, and that config file
25
+ # has got to conform to a certain format. If you're unsure, just look at the
26
+ # default options hashes in both Franz::Input and Franz::Output
27
+ opts = Trollop::options(ARGV) do
28
+ version Franz::VERSION
29
+ banner Franz::ART + "\n\n" + <<-EOS.gsub(/^ /, '')
30
+ #{Franz::SUMMARY}
31
+
32
+ Usage: franz [<options>]
33
+
34
+ Options:
35
+ EOS
36
+ opt :config, 'Configuration file to use', type: :string, default: config
37
+ opt :debug, 'Enable debugging output', default: false
38
+ opt :trace, 'Enable trace output', default: false
39
+ opt :log, 'Log to file, not STDOUT', type: :string, default: nil
40
+ end
41
+
42
+ Thread.abort_on_exception = true # Die quickly and with great ceremony
43
+
44
+ config = Franz::Config.new opts[:config]
45
+ logger = Franz::Logger.new opts[:debug], opts[:trace], opts[:log]
46
+
47
+ io_bound = config[:output][:bound] || 10_000
48
+
49
+ begin
50
+ io = SizedQueue.new io_bound
51
+
52
+ # Now we'll connect to our output, RabbitMQ. This creates a new thread in the
53
+ # background, which will consume the events generated by our input on io
54
+ fout = Franz::Output.new \
55
+ input: io,
56
+ output: config[:output][:rabbitmq],
57
+ logger: logger,
58
+ tags: config[:output][:tags]
59
+
60
+ # Franz has only one kind of input, plain text files.
61
+ fin = Franz::Input.new \
62
+ input: config[:input],
63
+ output: io,
64
+ logger: logger,
65
+ checkpoint: config[:checkpoint],
66
+ checkpoint_interval: config[:checkpoint_interval]
67
+
68
+ # Remember, both the input and output were started up in background threads,
69
+ # so we'll have to wait here in main or else we'll just exit.
70
+ fout.join
71
+
72
+ rescue SignalException => e
73
+ logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
74
+ rescue SystemExit, Interrupt => e
75
+ logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
76
+ ensure
77
+ logger.info 'Draining. This may take a while...'
78
+ fin.stop
79
+ fin.checkpoint
80
+ logger.info 'Bye!'
81
+ end
data/franz.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path(File.join('..', 'lib'), __FILE__)
3
+ require 'franz/metadata'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'franz'
7
+ s.version = Franz::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.author = Franz::AUTHOR
10
+ s.email = Franz::EMAIL
11
+ s.summary = Franz::SUMMARY
12
+ s.description = Franz::SUMMARY + '.'
13
+
14
+ s.add_runtime_dependency 'bunny', '~> 1'
15
+ s.add_runtime_dependency 'buftok', '~> 0'
16
+ s.add_runtime_dependency 'trollop', '~> 2'
17
+ s.add_runtime_dependency 'colorize', '~> 0'
18
+ s.add_runtime_dependency 'deep_merge', '~> 1'
19
+ s.add_runtime_dependency 'consistent-hashing', '~> 1'
20
+
21
+ s.files = `git ls-files`.split("\n")
22
+ s.test_files = `git ls-files -- test/*`.split("\n")
23
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File::basename(f) }
24
+ s.require_paths = %w[ lib ]
25
+ end
data/lib/franz.rb ADDED
@@ -0,0 +1,10 @@
1
+ require_relative 'franz/agg'
2
+ require_relative 'franz/config'
3
+ require_relative 'franz/discover'
4
+ require_relative 'franz/input'
5
+ require_relative 'franz/logger'
6
+ require_relative 'franz/metadata'
7
+ require_relative 'franz/output'
8
+ require_relative 'franz/tail'
9
+ require_relative 'franz/tail_pool'
10
+ require_relative 'franz/watch'
data/lib/franz/agg.rb ADDED
@@ -0,0 +1,158 @@
1
+ require 'logger'
2
+ require 'thread'
3
+ require 'socket'
4
+ require 'pathname'
5
+
6
+ require_relative 'sash'
7
+
8
+ module Franz
9
+
10
+ # Agg mostly aggregates Tail events by applying the multiline filter, but it
11
+ # also applies the "host" and "type" fields. Basically, it does all the post-
12
+ # processing after we've retreived a line from a file.
13
+ class Agg
14
+ @@host = Socket.gethostname # We'll apply the hostname to all events
15
+
16
+ attr_reader :seqs
17
+
18
+ # Start a new Agg thread in the background.
19
+ #
20
+ # @param [Hash] opts options for the aggregator
21
+ # @option opts [Array<Hash>] :configs ([]) file input configuration
22
+ # @option opts [Queue] :tail_events ([]) "input" queue from Tail
23
+ # @option opts [Queue] :agg_events ([]) "output" queue
24
+ # @option opts [Integer] :flush_interval (5) seconds between flushes
25
+ # @option opts [Hash<Path,Fixnum>] :seqs ({}) internal "seqs" state
26
+ # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
27
+ def initialize opts={}
28
+ @configs = opts[:configs] || Array.new
29
+ @tail_events = opts[:tail_events] || []
30
+ @agg_events = opts[:agg_events] || []
31
+ @flush_interval = opts[:flush_interval] || 10
32
+ @seqs = opts[:seqs] || Hash.new
33
+ @logger = opts[:logger] || Logger.new(STDOUT)
34
+
35
+ @types = Hash.new
36
+ @lock = Mutex.new
37
+ @buffer = Franz::Sash.new
38
+ @stop = false
39
+
40
+ @t1 = Thread.new do
41
+ log.debug 'starting agg-flush'
42
+ until @stop
43
+ flush
44
+ sleep flush_interval
45
+ end
46
+ sleep flush_interval
47
+ flush
48
+ end
49
+
50
+ @t2 = Thread.new do
51
+ log.debug 'starting agg-capture'
52
+ until @stop
53
+ capture
54
+ end
55
+ end
56
+
57
+ log.debug 'started agg'
58
+ end
59
+
60
+ # Stop the Agg thread. Effectively only once.
61
+ #
62
+ # @return [Hash] internal "seqs" state
63
+ def stop
64
+ return state if @stop
65
+ @stop = true
66
+ @t2.kill
67
+ @t1.join
68
+ log.debug 'stopped agg'
69
+ return state
70
+ end
71
+
72
+ # Return the internal "seqs" state
73
+ def state
74
+ return @seqs.dup
75
+ end
76
+
77
+ private
78
+ attr_reader :configs, :tail_events, :agg_events, :flush_interval, :seqs, :types, :lock, :buffer
79
+
80
+ def log ; @logger end
81
+
82
+ def type path
83
+ begin
84
+ @types.fetch path
85
+ rescue KeyError
86
+ configs.each do |config|
87
+ type = config[:type] if config[:includes].any? { |glob|
88
+ included = File.fnmatch? glob, path
89
+ excludes = !config[:excludes].nil?
90
+ excluded = excludes && config[:excludes].any? { |exlude|
91
+ File.fnmatch? exlude, File::basename(path)
92
+ }
93
+ included && !excluded
94
+ }
95
+ return @types[path] = type unless type.nil?
96
+ end
97
+ log.error 'Could not identify type for path=%s' % path
98
+ end
99
+ end
100
+
101
+ def config path
102
+ configs.select { |c| c[:type] == type(path) }.shift
103
+ end
104
+
105
+ def seq path
106
+ seqs[path] = seqs.fetch(path, 0) + 1
107
+ end
108
+
109
+ def real_path path
110
+ Pathname.new(path).realpath.to_s rescue path
111
+ end
112
+
113
+ def enqueue path, message
114
+ p = real_path path
115
+ t = type path
116
+ s = seq path
117
+ m = message.encode 'UTF-8', invalid: :replace, undef: :replace, replace: '?'
118
+ log.trace 'enqueue type=%s path=%s seq=%d message=%s' % [
119
+ t.inspect, p.inspect, s.inspect, m.inspect
120
+ ]
121
+ agg_events.push path: p, message: m, type: t, host: @@host, '@seq' => s
122
+ end
123
+
124
+ def capture
125
+ event = tail_events.shift
126
+ log.trace 'received path=%s line=%s' % [
127
+ event[:path], event[:line]
128
+ ]
129
+ multiline = config(event[:path])[:multiline]
130
+ if multiline.nil?
131
+ enqueue event[:path], event[:line] unless event[:line].empty?
132
+ else
133
+ lock.synchronize do
134
+ if event[:line] =~ multiline
135
+ buffered = buffer.flush(event[:path])
136
+ lines = buffered.map { |e| e[:line] }.join("\n")
137
+ enqueue event[:path], lines unless lines.empty?
138
+ end
139
+ buffer.insert event[:path], event
140
+ end
141
+ end
142
+ end
143
+
144
+ def flush
145
+ lock.synchronize do
146
+ started = Time.now
147
+ buffer.keys.each do |path|
148
+ if started - buffer.mtime(path) >= flush_interval
149
+ log.trace 'flushing path=%s' % path.inspect
150
+ buffered = buffer.remove(path)
151
+ lines = buffered.map { |e| e[:line] }.join("\n")
152
+ enqueue path, lines unless lines.empty?
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,24 @@
1
+ require 'json'
2
+
3
+ module Franz
4
+
5
+ # All things configuration.
6
+ class Config
7
+
8
+ # Load a config file path into a Hash, converting to some native types where
9
+ # appropriate (e.g. a String denoting a Regexp will become Regexp).
10
+ #
11
+ # @param path [String] path to a config file
12
+ #
13
+ # @return [Hash] config compiled into a native Hash
14
+ def self.new path
15
+ config = JSON::parse File.read(path), symbolize_names: true
16
+ config[:input][:configs].map! do |input|
17
+ input[:multiline] = Regexp.new input[:multiline] if input.has_key?(:multiline)
18
+ input[:type] = input[:type].to_sym
19
+ input
20
+ end
21
+ return config
22
+ end
23
+ end
24
+ end