franz 1.2.7 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 56734aa5910e2ab4f20bca8e21ae5cc76bbcfb9d
4
- data.tar.gz: 7bb3ea267a09bdeb68b1e0320daacd04c760890a
3
+ metadata.gz: 9f4c78fdbc67c94815f54786007c9762f1fdfe06
4
+ data.tar.gz: ed630055c0636262bae059d8574b4eb66f5cdb0c
5
5
  SHA512:
6
- metadata.gz: 58ceaffc0dc31e5fc7ea08c77d38c32eb7ace575f000182bb629d0c69888b4e7539e50e83d8a08cbb84e6d15b68b1cd5a03e55ed75f89ed0f35a225cdae79444
7
- data.tar.gz: 80c24a9410ea0e4404541f4eecbe2a87becec61fe193758f346a1699d7db145eeba80fde2e79a598df7427421d3ed995a7ad54b77c9a9628bef7146d8a3358a1
6
+ metadata.gz: 317124bed5cc429c1380d0f46fe97fffdb77e521cf9c9d501fcc1c6d1e5d3ed32c7eaa1b5384c6a818c0373c66bdbda7477f381b0daf8eb4e0845d1bc9c453a9
7
+ data.tar.gz: 7162d593cac7d9b60663ff608f2c278ab0657f4bdd2ad14f98ce12f1237a977c6f9836659500cec43c35dd9c4be76b1eb27f9e81c038a81a8eb7b009cff5f77f
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.2.7
1
+ 1.3.0
data/bin/franz CHANGED
@@ -46,7 +46,7 @@ logger = Franz::Logger.new opts[:debug], opts[:trace], opts[:log]
46
46
 
47
47
  io_bound = config[:output][:bound] || 10_000
48
48
 
49
- begin
49
+ # begin
50
50
  io = SizedQueue.new io_bound
51
51
 
52
52
  # Now we'll connect to our output, RabbitMQ. This creates a new thread in the
@@ -69,13 +69,13 @@ begin
69
69
  # so we'll have to wait here in main or else we'll just exit.
70
70
  fout.join
71
71
 
72
- rescue SignalException => e
73
- logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
74
- rescue SystemExit, Interrupt => e
75
- logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
76
- ensure
77
- logger.info 'Draining. This may take a while...'
78
- fin.stop
79
- fin.checkpoint
80
- logger.info 'Bye!'
81
- end
72
+ # rescue SignalException => e
73
+ # logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
74
+ # rescue SystemExit, Interrupt => e
75
+ # logger.fatal "#{e.inspect} #{$!}\n\t#{$@ * "\n\t"}"
76
+ # ensure
77
+ # logger.info 'Draining. This may take a while...'
78
+ # fin.stop
79
+ # fin.checkpoint
80
+ # logger.info 'Bye!'
81
+ # end
data/lib/franz/agg.rb CHANGED
@@ -25,9 +25,10 @@ module Franz
25
25
  # @option opts [Hash<Path,Fixnum>] :seqs ({}) internal "seqs" state
26
26
  # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
27
27
  def initialize opts={}
28
- @configs = opts[:configs] || Array.new
29
- @tail_events = opts[:tail_events] || []
30
- @agg_events = opts[:agg_events] || []
28
+ @configs = opts[:configs] || []
29
+ @tail_events = opts[:tail_events] || []
30
+ @agg_events = opts[:agg_events] || []
31
+
31
32
  @flush_interval = opts[:flush_interval] || 10
32
33
  @seqs = opts[:seqs] || Hash.new
33
34
  @logger = opts[:logger] || Logger.new(STDOUT)
@@ -37,21 +38,20 @@ module Franz
37
38
  @buffer = Franz::Sash.new
38
39
  @stop = false
39
40
 
41
+ log.debug 'agg: configs=%s tail_events=%s agg_events=%s' % [
42
+ @configs, @tail_events, @agg_events
43
+ ]
44
+
40
45
  @t1 = Thread.new do
41
- log.debug 'starting agg-flush'
42
46
  until @stop
43
47
  flush
44
48
  sleep flush_interval
45
49
  end
46
- sleep flush_interval
47
- flush
50
+ flush true
48
51
  end
49
52
 
50
53
  @t2 = Thread.new do
51
- log.debug 'starting agg-capture'
52
- until @stop
53
- capture
54
- end
54
+ capture until @stop
55
55
  end
56
56
 
57
57
  log.debug 'started agg'
@@ -141,11 +141,11 @@ module Franz
141
141
  end
142
142
  end
143
143
 
144
- def flush
144
+ def flush force=false
145
145
  lock.synchronize do
146
146
  started = Time.now
147
147
  buffer.keys.each do |path|
148
- if started - buffer.mtime(path) >= flush_interval
148
+ if started - buffer.mtime(path) >= flush_interval || force
149
149
  log.trace 'flushing path=%s' % path.inspect
150
150
  buffered = buffer.remove(path)
151
151
  lines = buffered.map { |e| e[:line] }.join("\n")
@@ -16,9 +16,10 @@ class Franz::Discover
16
16
  # @option opts [Array<Path>] :known ([]) internal "known" state
17
17
  # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
18
18
  def initialize opts={}
19
- @configs = opts[:configs] || []
20
- @discoveries = opts[:discoveries] || []
21
- @deletions = opts[:deletions] || []
19
+ @configs = opts[:configs] || []
20
+ @discoveries = opts[:discoveries] || []
21
+ @deletions = opts[:deletions] || []
22
+
22
23
  @discover_interval = opts[:discover_interval] || 30
23
24
  @ignore_before = opts[:ignore_before] || 0
24
25
  @known = opts[:known] || []
@@ -32,8 +33,11 @@ class Franz::Discover
32
33
 
33
34
  @stop = false
34
35
 
36
+ log.debug 'discover: configs=%s discoveries=%s deletions=%s' % [
37
+ @configs, @discoveries, @deletions
38
+ ]
39
+
35
40
  @thread = Thread.new do
36
- log.debug 'starting dicover-delete'
37
41
  until @stop
38
42
  until deletions.empty?
39
43
  d = deletions.pop
@@ -58,7 +62,7 @@ class Franz::Discover
58
62
  def stop
59
63
  return state if @stop
60
64
  @stop = true
61
- @thread.join
65
+ @thread.kill
62
66
  log.debug 'stopped discover'
63
67
  return state
64
68
  end
@@ -92,17 +96,18 @@ private
92
96
  end
93
97
 
94
98
  def expand glob
95
- dir_glob = File.dirname(glob)
96
- file_glob = File.basename(glob)
97
- files = []
98
- Dir.glob(dir_glob).each do |dir|
99
- next unless File::directory?(dir)
100
- Dir.foreach(dir) do |fname|
101
- next if fname == '.' || fname == '..'
102
- next unless File.fnmatch?(file_glob, fname)
103
- files << File.join(dir, fname)
104
- end
105
- end
106
- files
99
+ Dir[glob]
100
+ # dir_glob = File.dirname(glob)
101
+ # file_glob = File.basename(glob)
102
+ # files = []
103
+ # Dir.glob(dir_glob).each do |dir|
104
+ # next unless File::directory?(dir)
105
+ # Dir.foreach(dir) do |fname|
106
+ # next if fname == '.' || fname == '..'
107
+ # next unless File.fnmatch?(file_glob, fname)
108
+ # files << File.join(dir, fname)
109
+ # end
110
+ # end
111
+ # files
107
112
  end
108
113
  end
data/lib/franz/input.rb CHANGED
@@ -5,7 +5,6 @@ require 'deep_merge'
5
5
 
6
6
  require_relative 'agg'
7
7
  require_relative 'tail'
8
- require_relative 'tail_pool'
9
8
  require_relative 'watch'
10
9
  require_relative 'discover'
11
10
 
@@ -31,7 +30,6 @@ module Franz
31
30
  output: [],
32
31
  input: {
33
32
  ignore_before: 0,
34
- tail_pool_size: 10,
35
33
  discover_bound: 10_000,
36
34
  watch_bound: 1_000,
37
35
  tail_bound: 1_000,
@@ -49,6 +47,8 @@ module Franz
49
47
  @checkpoint_path = opts[:checkpoint].sub('*', '%d')
50
48
  @checkpoint_glob = opts[:checkpoint]
51
49
 
50
+ log.debug 'input: opts=%s' % JSON::pretty_generate(opts)
51
+
52
52
  # The checkpoint contains a Marshalled Hash with a compact representation of
53
53
  # stateful inputs to various Franz streaming classes (e.g. the "known" option
54
54
  # to Franz::Discover). This state file is generated automatically every time
@@ -162,10 +162,14 @@ module Franz
162
162
  def checkpoint
163
163
  old_checkpoints = Dir[@checkpoint_glob].sort_by { |p| File.mtime p }
164
164
  path = @checkpoint_path % Time.now
165
- File.open(path, 'w') { |f| f.write Marshal.dump(state) }
166
- old_checkpoints.pop # Keep last two checkpoints
167
- old_checkpoints.map { |c| FileUtils.rm c }
168
- log.info 'Wrote %s' % path.inspect
165
+ begin
166
+ File.open(path, 'w') { |f| f.write Marshal.dump(state) }
167
+ old_checkpoints.pop # Keep last two checkpoints
168
+ old_checkpoints.map { |c| FileUtils.rm c }
169
+ log.info 'Wrote %s' % path.inspect
170
+ rescue Errno::EMFILE
171
+ log.warn 'Could not write checkpoint (too many open files)'
172
+ end
169
173
  end
170
174
 
171
175
  private
data/lib/franz/logger.rb CHANGED
@@ -33,32 +33,49 @@ module Franz
33
33
  # @param debug [Boolean] enable DEBUG level logs
34
34
  # @param out [File] output destination for logs
35
35
  def initialize debug=false, trace=false, out=nil
36
+ colorize = out.nil?
36
37
  out ||= $stdout
37
38
  super out
38
- colorize
39
+ format colorize
39
40
  @trace = true if trace
40
41
  self.level = ::Logger::INFO
41
42
  self.level = ::Logger::DEBUG if debug
42
43
  end
43
44
 
44
45
  private
45
- def colorize
46
+ def format colorize
47
+ short_format = "%s\n"
48
+ long_format = "%s [%s] %s -- %s\n"
49
+
46
50
  self.formatter = proc do |severity, datetime, _, message|
47
- if level == 1
48
- message.to_s.colorize(
49
- color: SEVERITY_COLORS[severity.to_s][0],
50
- background: SEVERITY_COLORS[severity.to_s][1]
51
- ) + "\n"
52
- else
53
- "%s [%s] %s -- %s\n".colorize(
54
- color: SEVERITY_COLORS[severity.to_s][0],
55
- background: SEVERITY_COLORS[severity.to_s][1]
56
- ) % [
57
- severity,
58
- datetime.iso8601(6),
59
- File::basename(caller[4]),
60
- message
61
- ]
51
+ if colorize
52
+ if level == 1
53
+ short_format.colorize(
54
+ color: SEVERITY_COLORS[severity.to_s][0],
55
+ background: SEVERITY_COLORS[severity.to_s][1]
56
+ ) % message
57
+ else
58
+ long_format.colorize(
59
+ color: SEVERITY_COLORS[severity.to_s][0],
60
+ background: SEVERITY_COLORS[severity.to_s][1]
61
+ ) % [
62
+ severity,
63
+ datetime.iso8601(6),
64
+ File::basename(caller[4]),
65
+ message
66
+ ]
67
+ end
68
+ else # plain
69
+ if level == 1
70
+ short_format % message
71
+ else
72
+ long_format % [
73
+ severity,
74
+ datetime.iso8601(6),
75
+ File::basename(caller[4]),
76
+ message
77
+ ]
78
+ end
62
79
  end
63
80
  end
64
81
  end
data/lib/franz/output.rb CHANGED
@@ -34,7 +34,11 @@ module Franz
34
34
 
35
35
  @logger = opts[:logger]
36
36
 
37
- rabbit = Bunny.new opts[:output][:connection]
37
+ rabbit = Bunny.new opts[:output][:connection].merge \
38
+ automatically_recover: true,
39
+ threaded: true,
40
+ heartbeat: 90
41
+
38
42
  rabbit.start
39
43
 
40
44
  channel = rabbit.create_channel
data/lib/franz/tail.rb CHANGED
@@ -14,14 +14,19 @@ module Franz
14
14
  #
15
15
  # @param opts [Hash] a complex Hash for tail configuration
16
16
  def initialize opts={}
17
- @watch_events = opts[:watch_events] || []
18
- @tail_events = opts[:tail_events] || []
17
+ @watch_events = opts[:watch_events] || []
18
+ @tail_events = opts[:tail_events] || []
19
+
19
20
  @eviction_interval = opts[:eviction_interval] || 60
20
21
  @block_size = opts[:block_size] || 32_768 # 32 KiB
21
22
  @spread_size = opts[:spread_size] || 98_304 # 96 KiB
22
23
  @cursors = opts[:cursors] || Hash.new
23
24
  @logger = opts[:logger] || Logger.new(STDOUT)
24
25
 
26
+ log.debug 'tail: watch_events=%s tail_events=%s' % [
27
+ @watch_events, @tail_events
28
+ ]
29
+
25
30
  @buffer = Hash.new { |h, k| h[k] = BufferedTokenizer.new }
26
31
  @file = Hash.new
27
32
  @changed = Hash.new
@@ -29,44 +34,21 @@ module Franz
29
34
  @stop = false
30
35
 
31
36
  @evict_thread = Thread.new do
32
- log.debug 'starting tail-evict'
33
37
  until @stop
34
38
  evict
35
39
  sleep eviction_interval
36
40
  end
37
- sleep eviction_interval
38
- evict
39
- end
40
-
41
- @backlog = Hash.new { |h, k| h[k] = Array.new }
42
- @incoming = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
43
-
44
- @watch_thread = Thread.new do
45
- log.debug 'starting tail-watch'
46
- until @stop
47
- e = watch_events.shift
48
- @incoming[e[:path]].push e
49
- end
41
+ evict true
50
42
  end
51
43
 
52
44
  @tail_thread = Thread.new do
53
45
  until @stop
54
- had_event = false
55
-
56
- paths = (@backlog.keys + @incoming.keys).uniq.shuffle
57
-
58
- paths.each do |path|
59
- event = @backlog[path].shift
60
- begin
61
- event = @incoming[path].shift(true)
62
- rescue ThreadError
63
- next
64
- end if event.nil?
65
- had_event = true
66
- handle event
46
+ if @file.size >= OPEN_FILE_LIMIT
47
+ log.debug 'Sleeping until file descriptors become available...'
48
+ sleep 5
49
+ else
50
+ handle(watch_events.shift)
67
51
  end
68
-
69
- sleep 0.05 unless had_event
70
52
  end
71
53
  end
72
54
 
@@ -79,9 +61,9 @@ module Franz
79
61
  def stop
80
62
  return state if @stop
81
63
  @stop = true
82
- @watch_thread.kill
83
- @evict_thread.join
84
- @tail_thread.join
64
+ @watch_thread.kill rescue nil
65
+ @evict_thread.kill rescue nil
66
+ @tail_thread.kill rescue nil
85
67
  log.debug 'stopped tail'
86
68
  return state
87
69
  end
@@ -97,6 +79,11 @@ module Franz
97
79
  def log ; @logger end
98
80
 
99
81
  def open path
82
+ if file.size > OPEN_FILE_LIMIT
83
+ log.fatal 'Absolutely too many open files!'
84
+ raise Errno::EMFILE
85
+ end
86
+
100
87
  return true unless file[path].nil?
101
88
  pos = @cursors.include?(path) ? @cursors[path] : 0
102
89
  begin
@@ -104,10 +91,14 @@ module Franz
104
91
  file[path].sysseek pos, IO::SEEK_SET
105
92
  @cursors[path] = pos
106
93
  @changed[path] = Time.now.to_i
94
+ rescue Errno::EMFILE
95
+ log.debug 'skipping: path=%s (too many open files)' % path.inspect
96
+ return false
107
97
  rescue Errno::ENOENT
98
+ log.debug 'skipping: path=%s (file does not exist)' % path.inspect
108
99
  return false
109
100
  end
110
- log.debug 'opened: path=%s' % path.inspect
101
+ log.trace 'opened: path=%s' % path.inspect
111
102
  return true
112
103
  end
113
104
 
@@ -123,11 +114,6 @@ module Franz
123
114
  break if file[path].pos >= size
124
115
  end
125
116
 
126
- if bytes_read >= @spread_size
127
- @backlog[path].push name: :appended, path: path, size: size
128
- break
129
- end
130
-
131
117
  begin
132
118
  data = file[path].sysread @block_size
133
119
  buffer[path].extract(data).each do |line|
@@ -143,7 +129,10 @@ module Franz
143
129
  bytes_read += @cursors[path] - last_pos
144
130
  end
145
131
 
146
- log.trace 'read: path=%s size=%s' % [ path.inspect, size.inspect ]
132
+ log.trace 'read: path=%s size=%s cursor=%s' % [
133
+ path.inspect, size.inspect, @cursors[path].inspect
134
+ ]
135
+
147
136
  @changed[path] = Time.now.to_i
148
137
  @reading.delete path
149
138
  end
@@ -157,13 +146,14 @@ module Franz
157
146
  log.debug 'closed: path=%s' % path.inspect
158
147
  end
159
148
 
160
- def evict
149
+ def evict force=false
150
+ cutoff = Time.now.to_i - eviction_interval
161
151
  file.keys.each do |path|
162
- next if @reading[path]
163
- next unless @changed[path] < Time.now.to_i - eviction_interval
164
- next unless file.include? path
165
- next unless @incoming[path].empty?
166
- next unless @backlog[path].empty?
152
+ unless force
153
+ next if @reading[path]
154
+ next unless @changed[path] < cutoff
155
+ next unless file.include? path
156
+ end
167
157
  file.delete(path).close
168
158
  log.debug 'evicted: path=%s' % path.inspect
169
159
  end
data/lib/franz/watch.rb CHANGED
@@ -17,9 +17,10 @@ module Franz
17
17
  # @option opts [Hash<Path,State>] :stats ([]) internal "stats" state
18
18
  # @option opts [Logger] :logger (Logger.new(STDOUT)) logger to use
19
19
  def initialize opts={}
20
- @discoveries = opts[:discoveries] || []
21
- @deletions = opts[:deletions] || []
22
- @watch_events = opts[:watch_events] || []
20
+ @discoveries = opts[:discoveries] || []
21
+ @deletions = opts[:deletions] || []
22
+ @watch_events = opts[:watch_events] || []
23
+
23
24
  @watch_interval = opts[:watch_interval] || 10
24
25
  @stats = opts[:stats] || Hash.new
25
26
  @logger = opts[:logger] || Logger.new(STDOUT)
@@ -31,12 +32,14 @@ module Franz
31
32
 
32
33
  @stop = false
33
34
 
35
+ log.debug 'watch: discoveries=%s deletions=%s watch_events=%s' % [
36
+ @discoveries, @deletions, @watch_events
37
+ ]
38
+
34
39
  @thread = Thread.new do
35
- log.debug 'starting watch-discover'
36
40
  until @stop
37
41
  until discoveries.empty?
38
- d = discoveries.pop
39
- @stats[d] = nil
42
+ @stats[discoveries.shift] = nil
40
43
  end
41
44
  watch.each do |deleted|
42
45
  @stats.delete deleted
@@ -55,7 +58,7 @@ module Franz
55
58
  def stop
56
59
  return state if @stop
57
60
  @stop = true
58
- @thread.join
61
+ @thread.kill
59
62
  log.debug 'stopped watch'
60
63
  return state
61
64
  end
data/lib/franz.rb CHANGED
@@ -1,3 +1,6 @@
1
+ open_file_limit = `ulimit -n`.strip.to_i - 256
2
+ OPEN_FILE_LIMIT = open_file_limit <= 0 ? 256 : open_file_limit
3
+
1
4
  require_relative 'franz/agg'
2
5
  require_relative 'franz/config'
3
6
  require_relative 'franz/discover'
@@ -6,5 +9,4 @@ require_relative 'franz/logger'
6
9
  require_relative 'franz/metadata'
7
10
  require_relative 'franz/output'
8
11
  require_relative 'franz/tail'
9
- require_relative 'franz/tail_pool'
10
12
  require_relative 'franz/watch'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: franz
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.7
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sean Clemmer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-30 00:00:00.000000000 Z
11
+ date: 2014-09-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bunny
@@ -119,7 +119,6 @@ files:
119
119
  - lib/franz/output.rb
120
120
  - lib/franz/sash.rb
121
121
  - lib/franz/tail.rb
122
- - lib/franz/tail_pool.rb
123
122
  - lib/franz/watch.rb
124
123
  - test/test_franz_agg.rb
125
124
  - test/test_franz_discover.rb
@@ -1,68 +0,0 @@
1
- require 'thread'
2
- require 'logger'
3
-
4
- require 'consistent_hashing'
5
-
6
- module Franz
7
-
8
- # TailPool creates a consistenly-hashed pool of Tails.
9
- class TailPool
10
- # Start a new TailPool thread in the background.
11
- #
12
- # @param opts [Hash] a complex Hash for configuration
13
- def initialize opts={}
14
- @size = opts[:size] || 5
15
- @watch_events = opts[:watch_events] || []
16
- @tail_events = opts[:tail_events] || []
17
- @size = opts[:size] || 5
18
- @logger = opts[:logger] || Logger.new(STDOUT)
19
-
20
- @tails = []
21
- @ring = ConsistentHashing::Ring.new
22
- @events = Hash.new { |h, k| h[k] = SizedQueue.new 10_000 }
23
-
24
- @size.times do |i|
25
- log.debug 'starting tail_pool-tail #%d' % i
26
- @ring << @events[i]
27
- @tails << Franz::Tail.new(opts.merge({
28
- watch_events: @events[i],
29
- tail_events: @tail_events
30
- }))
31
- end
32
-
33
- @stop = false
34
-
35
- @in_thread = Thread.new do
36
- log.debug 'starting tail_pool-watch'
37
- until @stop
38
- e = @watch_events.shift
39
- q = @ring.node_for e[:path]
40
- q.push e
41
- end
42
- end
43
-
44
- log.debug 'started tail_pool'
45
- end
46
-
47
- # Stop the TailPool thread. Effectively only once.
48
- #
49
- # @return [Hash] internal "cursors" state
50
- def stop
51
- return state if @stop
52
- @stop = true
53
- @tails.map(&:stop)
54
- log.debug 'stopped tail_pool'
55
- return state
56
- end
57
-
58
- # Return the internal "cursors" state
59
- def state
60
- @tails.map(&:state).reduce(&:merge)
61
- end
62
-
63
- private
64
- attr_reader :watch_events, :tail_events, :size
65
-
66
- def log ; @logger end
67
- end
68
- end