franz 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ require 'logger'
2
+
3
+
4
+ # Discover performs half of file existence detection by expanding globs and
5
+ # keeping track of files known to Franz. Discover requires a deletions Queue to
6
+ # maintain this state, so it's fairly useless without a Watch.
7
+ class Franz::Discover
8
+
9
+ # Start a new Discover thread in the background.
10
+ #
11
+ # @param [Hash] opts options for the discovery
12
+ # @option opts [Array<Hash>] :configs ([]) file input configuration
13
+ # @option opts [Queue] :discoveries ([]) "output" queue of discovered paths
14
+ # @option opts [Queue] :deletions ([]) "input" queue of deleted paths
15
+ # @option opts [Integer] :discover_interval (5) seconds between discover rounds
16
+ # @option opts [Array<Path>] :known ([]) internal "known" state
17
+ # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
18
+ def initialize opts={}
19
+ @configs = opts[:configs] || []
20
+ @discoveries = opts[:discoveries] || []
21
+ @deletions = opts[:deletions] || []
22
+ @discover_interval = opts[:discover_interval] || 30
23
+ @ignore_before = opts[:ignore_before] || 0
24
+ @known = opts[:known] || []
25
+ @logger = opts[:logger] || Logger.new(STDOUT)
26
+
27
+ @configs = configs.map do |config|
28
+ config[:includes] ||= []
29
+ config[:excludes] ||= []
30
+ config
31
+ end
32
+
33
+ @stop = false
34
+
35
+ @thread = Thread.new do
36
+ log.debug 'starting dicover-delete'
37
+ until @stop
38
+ until deletions.empty?
39
+ d = deletions.pop
40
+ @known.delete d
41
+ log.debug 'deleted: %s' % d.inspect
42
+ end
43
+ discover.each do |discovery|
44
+ discoveries.push discovery
45
+ @known.push discovery
46
+ log.debug 'discovered: %s' % discovery.inspect
47
+ end
48
+ sleep discover_interval
49
+ end
50
+ end
51
+
52
+ log.debug 'started discover'
53
+ end
54
+
55
+ # Stop the Discover thread. Effectively only once.
56
+ #
57
+ # @return [Array] internal "known" state
58
+ def stop
59
+ return state if @stop
60
+ @stop = true
61
+ @thread.join
62
+ log.debug 'stopped discover'
63
+ return state
64
+ end
65
+
66
+ # Return the internal "known" state
67
+ def state
68
+ return @known.dup
69
+ end
70
+
71
+ private
72
+ attr_reader :configs, :discoveries, :deletions, :discover_interval, :known
73
+
74
+ def log ; @logger end
75
+
76
+ def discover
77
+ discovered = []
78
+ configs.each do |config|
79
+ config[:includes].each do |glob|
80
+ expand(glob).each do |path|
81
+ next if config[:excludes].any? { |exclude|
82
+ File.fnmatch? exclude, File::basename(path)
83
+ }
84
+ next if known.include? path
85
+ next unless File.file? path
86
+ next if File.mtime(path).to_i <= @ignore_before
87
+ discovered.push path
88
+ end
89
+ end
90
+ end
91
+ return discovered
92
+ end
93
+
94
+ def expand glob
95
+ dir_glob = File.dirname(glob)
96
+ file_glob = File.basename(glob)
97
+ files = []
98
+ Dir.glob(dir_glob).each do |dir|
99
+ next unless File::directory?(dir)
100
+ Dir.foreach(dir) do |fname|
101
+ next if fname == '.' || fname == '..'
102
+ next unless File.fnmatch?(file_glob, fname)
103
+ files << File.join(dir, fname)
104
+ end
105
+ end
106
+ files
107
+ end
108
+ end
@@ -0,0 +1,174 @@
1
+ require 'logger'
2
+ require 'fileutils'
3
+
4
+ require 'deep_merge'
5
+
6
+ require_relative 'agg'
7
+ require_relative 'tail'
8
+ require_relative 'tail_pool'
9
+ require_relative 'watch'
10
+ require_relative 'discover'
11
+
12
+ module Franz
13
+
14
+ # File input for Franz. Really, the only input for Franz, so I hope you like it.
15
+ class Input
16
+ # Start a new input in the background. We'll generate a stream of events by
17
+ # watching the filesystem for changes (Franz::Discover and Franz::Watch),
18
+ # tailing files (Franz::Tail), and generating events (Franz::Agg)
19
+ #
20
+ # @param [Hash] opts options for the aggregator
21
+ # @option opts [Hash] :input ({}) "input" configuration
22
+ # @option opts [Queue] :output ([]) "output" queue
23
+ # @option opts [Path] :checkpoint ({}) path to checkpoint file
24
+ # @option opts [Integer] :checkpoint_interval ({}) seconds between checkpoints
25
+ # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
26
+ def initialize opts={}
27
+ opts = {
28
+ checkpoint: 'franz.*.checkpoint',
29
+ checkpoint_interval: 30,
30
+ logger: Logger.new(STDOUT),
31
+ output: [],
32
+ input: {
33
+ ignore_before: 0,
34
+ tail_pool_size: 10,
35
+ discover_bound: 10_000,
36
+ watch_bound: 1_000,
37
+ tail_bound: 1_000,
38
+ discover_interval: nil,
39
+ watch_interval: nil,
40
+ eviction_interval: nil,
41
+ flush_interval: nil,
42
+ configs: []
43
+ }
44
+ }.deep_merge!(opts)
45
+
46
+ @logger = opts[:logger]
47
+
48
+ @checkpoint_interval = opts[:checkpoint_interval]
49
+ @checkpoint_path = opts[:checkpoint].sub('*', '%d')
50
+ @checkpoint_glob = opts[:checkpoint]
51
+
52
+ # The checkpoint contains a Marshalled Hash with a compact representation of
53
+ # stateful inputs to various Franz streaming classes (e.g. the "known" option
54
+ # to Franz::Discover). This state file is generated automatically every time
55
+ # the input exits (see below) and also at regular intervals.
56
+ checkpoints = Dir[@checkpoint_glob].sort_by { |path| File.mtime path }
57
+ checkpoints = checkpoints.reject { |path| File.zero? path }
58
+ last_checkpoint_path = checkpoints.pop
59
+ state = nil
60
+ unless last_checkpoint_path.nil?
61
+ last_checkpoint = File.read(last_checkpoint_path)
62
+ state = Marshal.load last_checkpoint
63
+ log.debug 'Loaded %s' % last_checkpoint_path.inspect
64
+ end
65
+
66
+ state = state || {}
67
+ known = state.keys
68
+ stats, cursors, seqs = {}, {}, {}
69
+ known.each do |path|
70
+ cursor = state[path].delete :cursor
71
+ seq = state[path].delete :seq
72
+ cursors[path] = cursor unless cursor.nil?
73
+ seqs[path] = seq unless seq.nil?
74
+ stats[path] = state[path]
75
+ end
76
+
77
+ log.debug 'starting input...'
78
+
79
+ discoveries = SizedQueue.new opts[:input][:discover_bound]
80
+ deletions = SizedQueue.new opts[:input][:discover_bound]
81
+ watch_events = SizedQueue.new opts[:input][:watch_bound]
82
+ tail_events = SizedQueue.new opts[:input][:tail_bound]
83
+
84
+ log.debug 'starting discover...'
85
+ @disover = Franz::Discover.new \
86
+ discoveries: discoveries,
87
+ deletions: deletions,
88
+ configs: opts[:input][:configs],
89
+ discover_interval: opts[:input][:discover_interval],
90
+ ignore_before: opts[:input][:ignore_before],
91
+ logger: opts[:logger],
92
+ known: known
93
+
94
+ log.debug 'starting tail...'
95
+ @tail = Franz::Tail.new \
96
+ watch_events: watch_events,
97
+ tail_events: tail_events,
98
+ eviction_interval: opts[:input][:eviction_interval],
99
+ logger: opts[:logger],
100
+ cursors: cursors
101
+
102
+ log.debug 'starting agg...'
103
+ @agg = Franz::Agg.new \
104
+ configs: opts[:input][:configs],
105
+ tail_events: tail_events,
106
+ agg_events: opts[:output],
107
+ flush_interval: opts[:input][:flush_interval],
108
+ logger: opts[:logger],
109
+ seqs: seqs
110
+
111
+ log.debug 'starting watch...'
112
+ @watch = Franz::Watch.new \
113
+ discoveries: discoveries,
114
+ deletions: deletions,
115
+ watch_events: watch_events,
116
+ watch_interval: opts[:input][:watch_interval],
117
+ logger: opts[:logger],
118
+ stats: stats
119
+
120
+ @stop = false
121
+ @t = Thread.new do
122
+ log.debug 'starting checkpoint'
123
+ until @stop
124
+ checkpoint
125
+ sleep @checkpoint_interval
126
+ end
127
+ end
128
+
129
+ log.debug 'started input'
130
+ end
131
+
132
+ # Stop everything. Has the effect of draining all the Queues and waiting on
133
+ # auxilliarly threads (e.g. eviction) to complete full intervals, so it may
134
+ # ordinarily take tens of seconds, depending on your configuration.
135
+ #
136
+ # @return [Hash] compact internal state
137
+ def stop
138
+ return state if @stop
139
+ @stop = true
140
+ @t.join
141
+ @watch.stop
142
+ @tail.stop
143
+ @agg.stop
144
+ log.debug 'stopped input'
145
+ return state
146
+ end
147
+
148
+ # Return a compact representation of internal state
149
+ def state
150
+ stats = @watch.state
151
+ cursors = @tail.state
152
+ seqs = @agg.state
153
+ stats.keys.each do |path|
154
+ stats[path] ||= {}
155
+ stats[path][:cursor] = cursors.fetch(path, nil)
156
+ stats[path][:seq] = seqs.fetch(path, nil)
157
+ end
158
+ return stats
159
+ end
160
+
161
+ # Write a checkpoint file given the current state
162
+ def checkpoint
163
+ old_checkpoints = Dir[@checkpoint_glob].sort_by { |p| File.mtime p }
164
+ path = @checkpoint_path % Time.now
165
+ File.open(path, 'w') { |f| f.write Marshal.dump(state) }
166
+ old_checkpoints.pop # Keep last two checkpoints
167
+ old_checkpoints.map { |c| FileUtils.rm c }
168
+ log.info 'Wrote %s' % path.inspect
169
+ end
170
+
171
+ private
172
+ def log ; @logger end
173
+ end
174
+ end
@@ -0,0 +1,66 @@
1
+ require 'logger'
2
+ require 'time'
3
+
4
+ require 'colorize'
5
+
6
+ module Franz
7
+
8
+ # Extending the Logger with TRACE capabilities
9
+ class ::Logger
10
+ SEV_LABEL << 'TRACE'
11
+ TRACE = SEV_LABEL.index('TRACE') # N.B. TRACE is above other levels
12
+
13
+ # Send a TRACE-level log line
14
+ def trace progname, &block
15
+ add TRACE, nil, progname, &block if @trace
16
+ end
17
+ end
18
+
19
+ # A powerful, colorful logger for Franz.
20
+ class Logger < Logger
21
+ # Maps each log level to a unique combination of fore- and background colors
22
+ SEVERITY_COLORS = {
23
+ 'DEBUG' => [ :blue, :default ],
24
+ 'INFO' => [ :green, :default ],
25
+ 'WARN' => [ :yellow, :default ],
26
+ 'ERROR' => [ :red, :default ],
27
+ 'FATAL' => [ :red, :black ],
28
+ 'TRACE' => [ :magenta, :default ]
29
+ }
30
+
31
+ # Create a new, colorful logger.
32
+ #
33
+ # @param debug [Boolean] enable DEBUG level logs
34
+ # @param out [File] output destination for logs
35
+ def initialize debug=false, trace=false, out=nil
36
+ out ||= $stdout
37
+ super out
38
+ colorize
39
+ @trace = true if trace
40
+ self.level = ::Logger::INFO
41
+ self.level = ::Logger::DEBUG if debug
42
+ end
43
+
44
+ private
45
+ def colorize
46
+ self.formatter = proc do |severity, datetime, _, message|
47
+ if level == 1
48
+ message.to_s.colorize(
49
+ color: SEVERITY_COLORS[severity.to_s][0],
50
+ background: SEVERITY_COLORS[severity.to_s][1]
51
+ ) + "\n"
52
+ else
53
+ "%s [%s] %s -- %s\n".colorize(
54
+ color: SEVERITY_COLORS[severity.to_s][0],
55
+ background: SEVERITY_COLORS[severity.to_s][1]
56
+ ) % [
57
+ severity,
58
+ datetime.iso8601(6),
59
+ File::basename(caller[4]),
60
+ message
61
+ ]
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,33 @@
1
+ # Franz ships line-oriented log files to RabbitMQ. Think barebones logstash in
2
+ # pure Ruby with more modest compute and memory requirements.
3
+ module Franz
4
+
5
+ # We use a VERSION file to tie into our build pipeline
6
+ VERSION = File.read(File.join(File.dirname(__FILE__), '..', '..', 'VERSION')).strip
7
+
8
+ # We don't really do all that much, be humble
9
+ SUMMARY = 'Aggregate log file events and send them elsewhere'
10
+
11
+ # Your benevolent dictator for life
12
+ AUTHOR = 'Sean Clemmer'
13
+
14
+ # Turn here to strangle your dictator
15
+ EMAIL = 'sclemmer@bluejeans.com'
16
+
17
+ # Every project deserves its own ASCII art
18
+ ART = <<-'EOART' % VERSION
19
+
20
+ .--.,
21
+ ,--.' \ __ ,-. ,---, ,----,
22
+ | | /\/,' ,'/ /| ,-+-. / | .' .`|
23
+ : : : ' | |' | ,--.--. ,--.'|' | .' .' .'
24
+ : | |-,| | ,'/ \ | | ,"' |,---, ' ./
25
+ | : :/|' : / .--. .-. | | | / | |; | .' /
26
+ | | .'| | ' \__\/: . . | | | | |`---' / ;--,
27
+ ' : ' ; : | ," .--.; | | | | |/ / / / .`|
28
+ | | | | , ; / / ,. | | | |--' ./__; .'
29
+ | : \ ---' ; : .' \| |/ ; | .'
30
+ | |,' | , .-./'---' `---'
31
+ `--' `--`---' v%s
32
+ EOART
33
+ end
@@ -0,0 +1,81 @@
1
+ require 'json'
2
+
3
+ require 'bunny'
4
+ require 'deep_merge'
5
+
6
+ module Franz
7
+
8
+ # RabbitMQ output for Franz. You must declare an x-consistent-hash type
9
+ # exchange, as we generate random Integers for routing keys.
10
+ class Output
11
+
12
+ # Start a new output in the background. We'll consume from the input queue
13
+ # and ship events to the configured RabbitMQ cluster.
14
+ #
15
+ # @param [Hash] opts options for the output
16
+ # @option opts [Queue] :input ([]) "input" queue
17
+ # @option opts [Hash] :output ({}) "output" configuration
18
+ def initialize opts={}
19
+ opts = {
20
+ logger: Logger.new(STDOUT),
21
+ tags: [],
22
+ input: [],
23
+ output: {
24
+ exchange: {
25
+ name: 'test',
26
+ durable: true
27
+ },
28
+ connection: {
29
+ host: 'localhost',
30
+ port: 5672
31
+ }
32
+ }
33
+ }.deep_merge!(opts)
34
+
35
+ @logger = opts[:logger]
36
+
37
+ rabbit = Bunny.new opts[:output][:connection]
38
+ rabbit.start
39
+
40
+ channel = rabbit.create_channel
41
+ exchange = opts[:output][:exchange].delete(:name)
42
+ exchange = channel.exchange exchange, \
43
+ opts[:output][:exchange].merge(type: 'x-consistent-hash')
44
+
45
+ @stop = false
46
+ @foreground = opts[:foreground]
47
+
48
+ @thread = Thread.new do
49
+ rand = Random.new
50
+ until @stop
51
+ event = opts[:input].shift
52
+ event[:tags] = opts[:tags] unless opts[:tags].empty?
53
+ log.trace 'publishing event=%s' % event.inspect
54
+ exchange.publish \
55
+ JSON::generate(event),
56
+ routing_key: rand.rand(1_000_000),
57
+ persistent: false
58
+ end
59
+ end
60
+
61
+ @thread.join if @foreground
62
+ end
63
+
64
+ # Join the Output thread. Effectively only once.
65
+ def join
66
+ return if @foreground
67
+ @foreground = true
68
+ @thread.join
69
+ end
70
+
71
+ # Stop the Output thread. Effectively only once.
72
+ def stop
73
+ return if @foreground
74
+ @foreground = true
75
+ @thread.kill
76
+ end
77
+
78
+ private
79
+ def log ; @logger end
80
+ end
81
+ end