franz 1.2.7 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 56734aa5910e2ab4f20bca8e21ae5cc76bbcfb9d
4
- data.tar.gz: 7bb3ea267a09bdeb68b1e0320daacd04c760890a
3
+ metadata.gz: 9f4c78fdbc67c94815f54786007c9762f1fdfe06
4
+ data.tar.gz: ed630055c0636262bae059d8574b4eb66f5cdb0c
5
5
  SHA512:
6
- metadata.gz: 58ceaffc0dc31e5fc7ea08c77d38c32eb7ace575f000182bb629d0c69888b4e7539e50e83d8a08cbb84e6d15b68b1cd5a03e55ed75f89ed0f35a225cdae79444
7
- data.tar.gz: 80c24a9410ea0e4404541f4eecbe2a87becec61fe193758f346a1699d7db145eeba80fde2e79a598df7427421d3ed995a7ad54b77c9a9628bef7146d8a3358a1
6
+ metadata.gz: 317124bed5cc429c1380d0f46fe97fffdb77e521cf9c9d501fcc1c6d1e5d3ed32c7eaa1b5384c6a818c0373c66bdbda7477f381b0daf8eb4e0845d1bc9c453a9
7
+ data.tar.gz: 7162d593cac7d9b60663ff608f2c278ab0657f4bdd2ad14f98ce12f1237a977c6f9836659500cec43c35dd9c4be76b1eb27f9e81c038a81a8eb7b009cff5f77f
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.2.7
1
+ 1.3.0
data/bin/franz CHANGED
@@ -46,7 +46,7 @@ logger = Franz::Logger.new opts[:debug], opts[:trace], opts[:log]
46
46
 
47
47
  io_bound = config[:output][:bound] || 10_000
48
48
 
49
- begin
49
+ # begin
50
50
  io = SizedQueue.new io_bound
51
51
 
52
52
  # Now we'll connect to our output, RabbitMQ. This creates a new thread in the
@@ -69,13 +69,13 @@ begin
69
69
  # so we'll have to wait here in main or else we'll just exit.
70
70
  fout.join
71
71
 
72
- rescue SignalException => e
73
- logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
74
- rescue SystemExit, Interrupt => e
75
- logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
76
- ensure
77
- logger.info 'Draining. This may take a while...'
78
- fin.stop
79
- fin.checkpoint
80
- logger.info 'Bye!'
81
- end
72
+ # rescue SignalException => e
73
+ # logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
74
+ # rescue SystemExit, Interrupt => e
75
+ # logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
76
+ # ensure
77
+ # logger.info 'Draining. This may take a while...'
78
+ # fin.stop
79
+ # fin.checkpoint
80
+ # logger.info 'Bye!'
81
+ # end
data/lib/franz/agg.rb CHANGED
@@ -25,9 +25,10 @@ module Franz
25
25
  # @option opts [Hash<Path,Fixnum>] :seqs ({}) internal "seqs" state
26
26
  # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
27
27
  def initialize opts={}
28
- @configs = opts[:configs] || Array.new
29
- @tail_events = opts[:tail_events] || []
30
- @agg_events = opts[:agg_events] || []
28
+ @configs = opts[:configs] || []
29
+ @tail_events = opts[:tail_events] || []
30
+ @agg_events = opts[:agg_events] || []
31
+
31
32
  @flush_interval = opts[:flush_interval] || 10
32
33
  @seqs = opts[:seqs] || Hash.new
33
34
  @logger = opts[:logger] || Logger.new(STDOUT)
@@ -37,21 +38,20 @@ module Franz
37
38
  @buffer = Franz::Sash.new
38
39
  @stop = false
39
40
 
41
+ log.debug 'agg: configs=%s tail_events=%s agg_events=%s' % [
42
+ @configs, @tail_events, @agg_events
43
+ ]
44
+
40
45
  @t1 = Thread.new do
41
- log.debug 'starting agg-flush'
42
46
  until @stop
43
47
  flush
44
48
  sleep flush_interval
45
49
  end
46
- sleep flush_interval
47
- flush
50
+ flush true
48
51
  end
49
52
 
50
53
  @t2 = Thread.new do
51
- log.debug 'starting agg-capture'
52
- until @stop
53
- capture
54
- end
54
+ capture until @stop
55
55
  end
56
56
 
57
57
  log.debug 'started agg'
@@ -141,11 +141,11 @@ module Franz
141
141
  end
142
142
  end
143
143
 
144
- def flush
144
+ def flush force=false
145
145
  lock.synchronize do
146
146
  started = Time.now
147
147
  buffer.keys.each do |path|
148
- if started - buffer.mtime(path) >= flush_interval
148
+ if started - buffer.mtime(path) >= flush_interval || force
149
149
  log.trace 'flushing path=%s' % path.inspect
150
150
  buffered = buffer.remove(path)
151
151
  lines = buffered.map { |e| e[:line] }.join("\n")
@@ -16,9 +16,10 @@ class Franz::Discover
16
16
  # @option opts [Array<Path>] :known ([]) internal "known" state
17
17
  # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
18
18
  def initialize opts={}
19
- @configs = opts[:configs] || []
20
- @discoveries = opts[:discoveries] || []
21
- @deletions = opts[:deletions] || []
19
+ @configs = opts[:configs] || []
20
+ @discoveries = opts[:discoveries] || []
21
+ @deletions = opts[:deletions] || []
22
+
22
23
  @discover_interval = opts[:discover_interval] || 30
23
24
  @ignore_before = opts[:ignore_before] || 0
24
25
  @known = opts[:known] || []
@@ -32,8 +33,11 @@ class Franz::Discover
32
33
 
33
34
  @stop = false
34
35
 
36
+ log.debug 'discover: configs=%s discoveries=%s deletions=%s' % [
37
+ @configs, @discoveries, @deletions
38
+ ]
39
+
35
40
  @thread = Thread.new do
36
- log.debug 'starting dicover-delete'
37
41
  until @stop
38
42
  until deletions.empty?
39
43
  d = deletions.pop
@@ -58,7 +62,7 @@ class Franz::Discover
58
62
  def stop
59
63
  return state if @stop
60
64
  @stop = true
61
- @thread.join
65
+ @thread.kill
62
66
  log.debug 'stopped discover'
63
67
  return state
64
68
  end
@@ -92,17 +96,18 @@ private
92
96
  end
93
97
 
94
98
  def expand glob
95
- dir_glob = File.dirname(glob)
96
- file_glob = File.basename(glob)
97
- files = []
98
- Dir.glob(dir_glob).each do |dir|
99
- next unless File::directory?(dir)
100
- Dir.foreach(dir) do |fname|
101
- next if fname == '.' || fname == '..'
102
- next unless File.fnmatch?(file_glob, fname)
103
- files << File.join(dir, fname)
104
- end
105
- end
106
- files
99
+ Dir[glob]
100
+ # dir_glob = File.dirname(glob)
101
+ # file_glob = File.basename(glob)
102
+ # files = []
103
+ # Dir.glob(dir_glob).each do |dir|
104
+ # next unless File::directory?(dir)
105
+ # Dir.foreach(dir) do |fname|
106
+ # next if fname == '.' || fname == '..'
107
+ # next unless File.fnmatch?(file_glob, fname)
108
+ # files << File.join(dir, fname)
109
+ # end
110
+ # end
111
+ # files
107
112
  end
108
113
  end
data/lib/franz/input.rb CHANGED
@@ -5,7 +5,6 @@ require 'deep_merge'
5
5
 
6
6
  require_relative 'agg'
7
7
  require_relative 'tail'
8
- require_relative 'tail_pool'
9
8
  require_relative 'watch'
10
9
  require_relative 'discover'
11
10
 
@@ -31,7 +30,6 @@ module Franz
31
30
  output: [],
32
31
  input: {
33
32
  ignore_before: 0,
34
- tail_pool_size: 10,
35
33
  discover_bound: 10_000,
36
34
  watch_bound: 1_000,
37
35
  tail_bound: 1_000,
@@ -49,6 +47,8 @@ module Franz
49
47
  @checkpoint_path = opts[:checkpoint].sub('*', '%d')
50
48
  @checkpoint_glob = opts[:checkpoint]
51
49
 
50
+ log.debug 'input: opts=%s' % JSON::pretty_generate(opts)
51
+
52
52
  # The checkpoint contains a Marshalled Hash with a compact representation of
53
53
  # stateful inputs to various Franz streaming classes (e.g. the "known" option
54
54
  # to Franz::Discover). This state file is generated automatically every time
@@ -162,10 +162,14 @@ module Franz
162
162
  def checkpoint
163
163
  old_checkpoints = Dir[@checkpoint_glob].sort_by { |p| File.mtime p }
164
164
  path = @checkpoint_path % Time.now
165
- File.open(path, 'w') { |f| f.write Marshal.dump(state) }
166
- old_checkpoints.pop # Keep last two checkpoints
167
- old_checkpoints.map { |c| FileUtils.rm c }
168
- log.info 'Wrote %s' % path.inspect
165
+ begin
166
+ File.open(path, 'w') { |f| f.write Marshal.dump(state) }
167
+ old_checkpoints.pop # Keep last two checkpoints
168
+ old_checkpoints.map { |c| FileUtils.rm c }
169
+ log.info 'Wrote %s' % path.inspect
170
+ rescue Errno::EMFILE
171
+ log.warn 'Could not write checkpoint (too many open files)'
172
+ end
169
173
  end
170
174
 
171
175
  private
data/lib/franz/logger.rb CHANGED
@@ -33,32 +33,49 @@ module Franz
33
33
  # @param debug [Boolean] enable DEBUG level logs
34
34
  # @param out [File] output destination for logs
35
35
  def initialize debug=false, trace=false, out=nil
36
+ colorize = out.nil?
36
37
  out ||= $stdout
37
38
  super out
38
- colorize
39
+ format colorize
39
40
  @trace = true if trace
40
41
  self.level = ::Logger::INFO
41
42
  self.level = ::Logger::DEBUG if debug
42
43
  end
43
44
 
44
45
  private
45
- def colorize
46
+ def format colorize
47
+ short_format = "%s\n"
48
+ long_format = "%s [%s] %s -- %s\n"
49
+
46
50
  self.formatter = proc do |severity, datetime, _, message|
47
- if level == 1
48
- message.to_s.colorize(
49
- color: SEVERITY_COLORS[severity.to_s][0],
50
- background: SEVERITY_COLORS[severity.to_s][1]
51
- ) + "\n"
52
- else
53
- "%s [%s] %s -- %s\n".colorize(
54
- color: SEVERITY_COLORS[severity.to_s][0],
55
- background: SEVERITY_COLORS[severity.to_s][1]
56
- ) % [
57
- severity,
58
- datetime.iso8601(6),
59
- File::basename(caller[4]),
60
- message
61
- ]
51
+ if colorize
52
+ if level == 1
53
+ short_format.colorize(
54
+ color: SEVERITY_COLORS[severity.to_s][0],
55
+ background: SEVERITY_COLORS[severity.to_s][1]
56
+ ) % message
57
+ else
58
+ long_format.colorize(
59
+ color: SEVERITY_COLORS[severity.to_s][0],
60
+ background: SEVERITY_COLORS[severity.to_s][1]
61
+ ) % [
62
+ severity,
63
+ datetime.iso8601(6),
64
+ File::basename(caller[4]),
65
+ message
66
+ ]
67
+ end
68
+ else # plain
69
+ if level == 1
70
+ short_format % message
71
+ else
72
+ long_format % [
73
+ severity,
74
+ datetime.iso8601(6),
75
+ File::basename(caller[4]),
76
+ message
77
+ ]
78
+ end
62
79
  end
63
80
  end
64
81
  end
data/lib/franz/output.rb CHANGED
@@ -34,7 +34,11 @@ module Franz
34
34
 
35
35
  @logger = opts[:logger]
36
36
 
37
- rabbit = Bunny.new opts[:output][:connection]
37
+ rabbit = Bunny.new opts[:output][:connection].merge \
38
+ automatically_recover: true,
39
+ threaded: true,
40
+ heartbeat: 90
41
+
38
42
  rabbit.start
39
43
 
40
44
  channel = rabbit.create_channel
data/lib/franz/tail.rb CHANGED
@@ -14,14 +14,19 @@ module Franz
14
14
  #
15
15
  # @param opts [Hash] a complex Hash for tail configuration
16
16
  def initialize opts={}
17
- @watch_events = opts[:watch_events] || []
18
- @tail_events = opts[:tail_events] || []
17
+ @watch_events = opts[:watch_events] || []
18
+ @tail_events = opts[:tail_events] || []
19
+
19
20
  @eviction_interval = opts[:eviction_interval] || 60
20
21
  @block_size = opts[:block_size] || 32_768 # 32 KiB
21
22
  @spread_size = opts[:spread_size] || 98_304 # 96 KiB
22
23
  @cursors = opts[:cursors] || Hash.new
23
24
  @logger = opts[:logger] || Logger.new(STDOUT)
24
25
 
26
+ log.debug 'tail: watch_events=%s tail_events=%s' % [
27
+ @watch_events, @tail_events
28
+ ]
29
+
25
30
  @buffer = Hash.new { |h, k| h[k] = BufferedTokenizer.new }
26
31
  @file = Hash.new
27
32
  @changed = Hash.new
@@ -29,44 +34,21 @@ module Franz
29
34
  @stop = false
30
35
 
31
36
  @evict_thread = Thread.new do
32
- log.debug 'starting tail-evict'
33
37
  until @stop
34
38
  evict
35
39
  sleep eviction_interval
36
40
  end
37
- sleep eviction_interval
38
- evict
39
- end
40
-
41
- @backlog = Hash.new { |h, k| h[k] = Array.new }
42
- @incoming = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
43
-
44
- @watch_thread = Thread.new do
45
- log.debug 'starting tail-watch'
46
- until @stop
47
- e = watch_events.shift
48
- @incoming[e[:path]].push e
49
- end
41
+ evict true
50
42
  end
51
43
 
52
44
  @tail_thread = Thread.new do
53
45
  until @stop
54
- had_event = false
55
-
56
- paths = (@backlog.keys + @incoming.keys).uniq.shuffle
57
-
58
- paths.each do |path|
59
- event = @backlog[path].shift
60
- begin
61
- event = @incoming[path].shift(true)
62
- rescue ThreadError
63
- next
64
- end if event.nil?
65
- had_event = true
66
- handle event
46
+ if @file.size >= OPEN_FILE_LIMIT
47
+ log.debug 'Sleeping until file descriptors become available...'
48
+ sleep 5
49
+ else
50
+ handle(watch_events.shift)
67
51
  end
68
-
69
- sleep 0.05 unless had_event
70
52
  end
71
53
  end
72
54
 
@@ -79,9 +61,9 @@ module Franz
79
61
  def stop
80
62
  return state if @stop
81
63
  @stop = true
82
- @watch_thread.kill
83
- @evict_thread.join
84
- @tail_thread.join
64
+ @watch_thread.kill rescue nil
65
+ @evict_thread.kill rescue nil
66
+ @tail_thread.kill rescue nil
85
67
  log.debug 'stopped tail'
86
68
  return state
87
69
  end
@@ -97,6 +79,11 @@ module Franz
97
79
  def log ; @logger end
98
80
 
99
81
  def open path
82
+ if file.size > OPEN_FILE_LIMIT
83
+ log.fatal 'Absolutely too many open files!'
84
+ raise Errno::EMFILE
85
+ end
86
+
100
87
  return true unless file[path].nil?
101
88
  pos = @cursors.include?(path) ? @cursors[path] : 0
102
89
  begin
@@ -104,10 +91,14 @@ module Franz
104
91
  file[path].sysseek pos, IO::SEEK_SET
105
92
  @cursors[path] = pos
106
93
  @changed[path] = Time.now.to_i
94
+ rescue Errno::EMFILE
95
+ log.debug 'skipping: path=%s (too many open files)' % path.inspect
96
+ return false
107
97
  rescue Errno::ENOENT
98
+ log.debug 'skipping: path=%s (file does not exist)' % path.inspect
108
99
  return false
109
100
  end
110
- log.debug 'opened: path=%s' % path.inspect
101
+ log.trace 'opened: path=%s' % path.inspect
111
102
  return true
112
103
  end
113
104
 
@@ -123,11 +114,6 @@ module Franz
123
114
  break if file[path].pos >= size
124
115
  end
125
116
 
126
- if bytes_read >= @spread_size
127
- @backlog[path].push name: :appended, path: path, size: size
128
- break
129
- end
130
-
131
117
  begin
132
118
  data = file[path].sysread @block_size
133
119
  buffer[path].extract(data).each do |line|
@@ -143,7 +129,10 @@ module Franz
143
129
  bytes_read += @cursors[path] - last_pos
144
130
  end
145
131
 
146
- log.trace 'read: path=%s size=%s' % [ path.inspect, size.inspect ]
132
+ log.trace 'read: path=%s size=%s cursor=%s' % [
133
+ path.inspect, size.inspect, @cursors[path].inspect
134
+ ]
135
+
147
136
  @changed[path] = Time.now.to_i
148
137
  @reading.delete path
149
138
  end
@@ -157,13 +146,14 @@ module Franz
157
146
  log.debug 'closed: path=%s' % path.inspect
158
147
  end
159
148
 
160
- def evict
149
+ def evict force=false
150
+ cutoff = Time.now.to_i - eviction_interval
161
151
  file.keys.each do |path|
162
- next if @reading[path]
163
- next unless @changed[path] < Time.now.to_i - eviction_interval
164
- next unless file.include? path
165
- next unless @incoming[path].empty?
166
- next unless @backlog[path].empty?
152
+ unless force
153
+ next if @reading[path]
154
+ next unless @changed[path] < cutoff
155
+ next unless file.include? path
156
+ end
167
157
  file.delete(path).close
168
158
  log.debug 'evicted: path=%s' % path.inspect
169
159
  end
data/lib/franz/watch.rb CHANGED
@@ -17,9 +17,10 @@ module Franz
17
17
  # @option opts [Hash<Path,State>] :stats ([]) internal "stats" state
18
18
  # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
19
19
  def initialize opts={}
20
- @discoveries = opts[:discoveries] || []
21
- @deletions = opts[:deletions] || []
22
- @watch_events = opts[:watch_events] || []
20
+ @discoveries = opts[:discoveries] || []
21
+ @deletions = opts[:deletions] || []
22
+ @watch_events = opts[:watch_events] || []
23
+
23
24
  @watch_interval = opts[:watch_interval] || 10
24
25
  @stats = opts[:stats] || Hash.new
25
26
  @logger = opts[:logger] || Logger.new(STDOUT)
@@ -31,12 +32,14 @@ module Franz
31
32
 
32
33
  @stop = false
33
34
 
35
+ log.debug 'watch: discoveries=%s deletions=%s watch_events=%s' % [
36
+ @discoveries, @deletions, @watch_events
37
+ ]
38
+
34
39
  @thread = Thread.new do
35
- log.debug 'starting watch-discover'
36
40
  until @stop
37
41
  until discoveries.empty?
38
- d = discoveries.pop
39
- @stats[d] = nil
42
+ @stats[discoveries.shift] = nil
40
43
  end
41
44
  watch.each do |deleted|
42
45
  @stats.delete deleted
@@ -55,7 +58,7 @@ module Franz
55
58
  def stop
56
59
  return state if @stop
57
60
  @stop = true
58
- @thread.join
61
+ @thread.kill
59
62
  log.debug 'stopped watch'
60
63
  return state
61
64
  end
data/lib/franz.rb CHANGED
@@ -1,3 +1,6 @@
1
+ open_file_limit = `ulimit -n`.strip.to_i - 256
2
+ OPEN_FILE_LIMIT = open_file_limit <= 0 ? 256 : open_file_limit
3
+
1
4
  require_relative 'franz/agg'
2
5
  require_relative 'franz/config'
3
6
  require_relative 'franz/discover'
@@ -6,5 +9,4 @@ require_relative 'franz/logger'
6
9
  require_relative 'franz/metadata'
7
10
  require_relative 'franz/output'
8
11
  require_relative 'franz/tail'
9
- require_relative 'franz/tail_pool'
10
12
  require_relative 'franz/watch'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: franz
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.7
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sean Clemmer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-30 00:00:00.000000000 Z
11
+ date: 2014-09-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bunny
@@ -119,7 +119,6 @@ files:
119
119
  - lib/franz/output.rb
120
120
  - lib/franz/sash.rb
121
121
  - lib/franz/tail.rb
122
- - lib/franz/tail_pool.rb
123
122
  - lib/franz/watch.rb
124
123
  - test/test_franz_agg.rb
125
124
  - test/test_franz_discover.rb
@@ -1,68 +0,0 @@
1
- require 'thread'
2
- require 'logger'
3
-
4
- require 'consistent_hashing'
5
-
6
- module Franz
7
-
8
- # TailPool creates a consistenly-hashed pool of Tails.
9
- class TailPool
10
- # Start a new TailPool thread in the background.
11
- #
12
- # @param opts [Hash] a complex Hash for configuration
13
- def initialize opts={}
14
- @size = opts[:size] || 5
15
- @watch_events = opts[:watch_events] || []
16
- @tail_events = opts[:tail_events] || []
17
- @size = opts[:size] || 5
18
- @logger = opts[:logger] || Logger.new(STDOUT)
19
-
20
- @tails = []
21
- @ring = ConsistentHashing::Ring.new
22
- @events = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
23
-
24
- @size.times do |i|
25
- log.debug 'starting tail_pool-tail #%d' % i
26
- @ring << @events[i]
27
- @tails << Franz::Tail.new(opts.merge({
28
- watch_events: @events[i],
29
- tail_events: @tail_events
30
- }))
31
- end
32
-
33
- @stop = false
34
-
35
- @in_thread = Thread.new do
36
- log.debug 'starting tail_pool-watch'
37
- until @stop
38
- e = @watch_events.shift
39
- q = @ring.node_for e[:path]
40
- q.push e
41
- end
42
- end
43
-
44
- log.debug 'started tail_pool'
45
- end
46
-
47
- # Stop the TailPool thread. Effectively only once.
48
- #
49
- # @return [Hash] internal "cursors" state
50
- def stop
51
- return state if @stop
52
- @stop = true
53
- @tails.map(&:stop)
54
- log.debug 'stopped tail_pool'
55
- return state
56
- end
57
-
58
- # Return the internal "cursors" state
59
- def state
60
- @tails.map(&:state).reduce(&:merge)
61
- end
62
-
63
- private
64
- attr_reader :watch_events, :tail_events, :size
65
-
66
- def log ; @logger end
67
- end
68
- end