fluq 0.7.5 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.travis.yml +3 -0
  4. data/Gemfile +12 -1
  5. data/Gemfile.lock +44 -8
  6. data/README.md +24 -6
  7. data/Rakefile +8 -1
  8. data/benchmark/socket.rb +13 -25
  9. data/examples/config/multi.rb +52 -0
  10. data/examples/config/simple.rb +15 -0
  11. data/fluq.gemspec +3 -3
  12. data/lib/fluq.rb +22 -16
  13. data/lib/fluq/cli.rb +3 -12
  14. data/lib/fluq/dsl.rb +2 -45
  15. data/lib/fluq/dsl/base.rb +11 -0
  16. data/lib/fluq/dsl/feed.rb +24 -0
  17. data/lib/fluq/dsl/root.rb +35 -0
  18. data/lib/fluq/event.rb +9 -28
  19. data/lib/fluq/feed.rb +40 -5
  20. data/lib/fluq/format.rb +6 -0
  21. data/lib/fluq/format/base.rb +42 -0
  22. data/lib/fluq/format/json.rb +17 -0
  23. data/lib/fluq/format/lines.rb +27 -0
  24. data/lib/fluq/format/msgpack.rb +28 -0
  25. data/lib/fluq/format/tsv.rb +19 -0
  26. data/lib/fluq/handler.rb +1 -1
  27. data/lib/fluq/handler/base.rb +11 -38
  28. data/lib/fluq/handler/log.rb +12 -14
  29. data/lib/fluq/handler/noop.rb +2 -0
  30. data/lib/fluq/input/base.rb +33 -29
  31. data/lib/fluq/input/socket.rb +46 -16
  32. data/lib/fluq/mixins.rb +2 -2
  33. data/lib/fluq/runner.rb +41 -0
  34. data/lib/fluq/testing.rb +5 -11
  35. data/lib/fluq/version.rb +1 -1
  36. data/lib/fluq/worker.rb +73 -0
  37. data/spec/fluq/dsl/feed_spec.rb +33 -0
  38. data/spec/fluq/dsl/root_spec.rb +20 -0
  39. data/spec/fluq/event_spec.rb +17 -12
  40. data/spec/fluq/feed_spec.rb +24 -0
  41. data/spec/fluq/format/base_spec.rb +9 -0
  42. data/spec/fluq/format/json_spec.rb +22 -0
  43. data/spec/fluq/format/lines_spec.rb +20 -0
  44. data/spec/fluq/format/msgpack_spec.rb +22 -0
  45. data/spec/fluq/format/tsv_spec.rb +21 -0
  46. data/spec/fluq/handler/base_spec.rb +7 -52
  47. data/spec/fluq/handler/log_spec.rb +11 -14
  48. data/spec/fluq/handler/{null_spec.rb → noop_spec.rb} +1 -3
  49. data/spec/fluq/input/base_spec.rb +48 -15
  50. data/spec/fluq/input/socket_spec.rb +34 -26
  51. data/spec/fluq/mixins/loggable_spec.rb +2 -2
  52. data/spec/fluq/runner_spec.rb +18 -0
  53. data/spec/fluq/worker_spec.rb +87 -0
  54. data/spec/fluq_spec.rb +1 -2
  55. data/spec/scenario/config/nested/feed1.rb +6 -0
  56. data/spec/scenario/config/test.rb +8 -2
  57. data/spec/spec_helper.rb +7 -26
  58. metadata +62 -62
  59. data/benchmark/logging.rb +0 -37
  60. data/examples/common.rb +0 -3
  61. data/examples/simple.rb +0 -5
  62. data/lib/fluq/buffer.rb +0 -6
  63. data/lib/fluq/buffer/base.rb +0 -51
  64. data/lib/fluq/buffer/file.rb +0 -68
  65. data/lib/fluq/feed/base.rb +0 -37
  66. data/lib/fluq/feed/json.rb +0 -28
  67. data/lib/fluq/feed/msgpack.rb +0 -27
  68. data/lib/fluq/feed/tsv.rb +0 -30
  69. data/lib/fluq/handler/null.rb +0 -4
  70. data/lib/fluq/input/socket/connection.rb +0 -41
  71. data/lib/fluq/mixins/logger.rb +0 -26
  72. data/lib/fluq/reactor.rb +0 -79
  73. data/spec/fluq/buffer/base_spec.rb +0 -21
  74. data/spec/fluq/buffer/file_spec.rb +0 -47
  75. data/spec/fluq/dsl_spec.rb +0 -43
  76. data/spec/fluq/feed/base_spec.rb +0 -15
  77. data/spec/fluq/feed/json_spec.rb +0 -27
  78. data/spec/fluq/feed/msgpack_spec.rb +0 -27
  79. data/spec/fluq/feed/tsv_spec.rb +0 -27
  80. data/spec/fluq/input/socket/connection_spec.rb +0 -35
  81. data/spec/fluq/mixins/logger_spec.rb +0 -25
  82. data/spec/fluq/reactor_spec.rb +0 -69
  83. data/spec/scenario/config/nested/common.rb +0 -3
@@ -0,0 +1,11 @@
1
+ class FluQ::DSL::Base
2
+
3
+ protected
4
+
5
+ def constantize(*path)
6
+ require([:fluq, *path].join('/'))
7
+ names = path.map {|p| p.to_s.split('_').map(&:capitalize).join }
8
+ names.inject(FluQ) {|klass, name| klass.const_get(name) }
9
+ end
10
+
11
+ end
@@ -0,0 +1,24 @@
1
+ # Feed-level DSL configuration
2
+ class FluQ::DSL::Feed < FluQ::DSL::Base
3
+ attr_reader :name, :inputs, :handlers
4
+
5
+ def initialize(name, &block)
6
+ @name = name
7
+ @inputs = []
8
+ @handlers = []
9
+ instance_eval(&block)
10
+ end
11
+
12
+ # @param [Array<Symbol>] input type path, e.g. :socket
13
+ def input(*type, &block)
14
+ klass = constantize(:input, *type)
15
+ inputs.push [klass, FluQ::DSL::Options.new(&block).to_hash]
16
+ end
17
+
18
+ # @param [Array<Symbol>] handler type path, e.g. :log, :counter
19
+ def handler(*type, &block)
20
+ klass = constantize(:handler, *type)
21
+ handlers.push [klass, FluQ::DSL::Options.new(&block).to_hash]
22
+ end
23
+
24
+ end
@@ -0,0 +1,35 @@
1
+ # Root-level DSL configuration
2
+ class FluQ::DSL::Root < FluQ::DSL::Base
3
+ attr_reader :path, :feeds
4
+
5
+ # @param [String] DSL script file path
6
+ def initialize(path)
7
+ @path = Pathname.new(path)
8
+ @feeds = []
9
+
10
+ instance_eval @path.read
11
+ end
12
+
13
+ # @param [String] feed name, e.g. "my_events"
14
+ def feed(name, &block)
15
+ feeds.push FluQ::DSL::Feed.new(name, &block)
16
+ end
17
+
18
+ # @param [String] relative relative path
19
+ def import(relative)
20
+ instance_eval path.dirname.join(relative).read
21
+ end
22
+
23
+ # Applies the configuration.
24
+ # Registers components of each feed. Handlers first, then inputs.
25
+ # @param [FluQ::Runner] runner
26
+ def apply(runner)
27
+ feeds.each do |conf|
28
+ runner.feed conf.name do |feed|
29
+ conf.handlers.each {|k, *a| feed.register(k, *a) }
30
+ conf.inputs.each {|k, *a| feed.listen(k, *a) }
31
+ end
32
+ end
33
+ end
34
+
35
+ end
data/lib/fluq/event.rb CHANGED
@@ -1,12 +1,13 @@
1
1
  class FluQ::Event < Hash
2
2
 
3
- attr_reader :tag, :timestamp
3
+ attr_accessor :timestamp
4
+ attr_reader :meta
4
5
 
5
- # @param [String] tag the event tag
6
- # @param [Integer] timestamp the UNIX timestamp
7
6
  # @param [Hash] record the attribute pairs
8
- def initialize(tag = "", timestamp = 0, record = {})
9
- @tag, @timestamp = tag.to_s, timestamp.to_i
7
+ # @param [Integer] timestamp the UNIX timestamp
8
+ def initialize(record = {}, timestamp = Time.now)
9
+ @timestamp = timestamp.to_i
10
+ @meta = {}
10
11
  super()
11
12
  update(record) if Hash === record
12
13
  end
@@ -16,40 +17,20 @@ class FluQ::Event < Hash
16
17
  @time ||= Time.at(timestamp).utc
17
18
  end
18
19
 
19
- # @return [Array] tuple
20
- def to_a
21
- [tag, timestamp, self]
22
- end
23
-
24
20
  # @return [Boolean] true if comparable
25
21
  def ==(other)
26
22
  case other
27
- when Array
28
- to_a == other
23
+ when FluQ::Event
24
+ super && other.timestamp == timestamp
29
25
  else
30
26
  super
31
27
  end
32
28
  end
33
29
  alias :eql? :==
34
30
 
35
- # @return [String] tab-separated string
36
- def to_tsv
37
- [tag, timestamp, Oj.dump(self)].join("\t")
38
- end
39
-
40
- # @return [String] JSON encoded
41
- def to_json
42
- Oj.dump merge("=" => tag, "@" => timestamp)
43
- end
44
-
45
- # @return [String] mgspack encoded bytes
46
- def to_msgpack
47
- MessagePack.pack merge("=" => tag, "@" => timestamp)
48
- end
49
-
50
31
  # @return [String] inspection
51
32
  def inspect
52
- [tag, timestamp, Hash.new.update(self)].inspect
33
+ "#<FluQ::Event(#{timestamp}) data:#{super} meta:#{meta.inspect}>"
53
34
  end
54
35
 
55
36
  end
data/lib/fluq/feed.rb CHANGED
@@ -1,6 +1,41 @@
1
- module FluQ::Feed
2
- end
1
+ class FluQ::Feed < Celluloid::SupervisionGroup
2
+
3
+ # @attr_reader [String] name
4
+ attr_reader :name
5
+
6
+ # @attr_reader [Array] handlers
7
+ attr_reader :handlers
8
+
9
+ # Constructor
10
+ # @param [String] name feed name
11
+ def initialize(name, &block)
12
+ @name = name.to_s
13
+ @handlers = []
14
+ super(&block)
15
+ end
16
+
17
+ # @return [Array<FluQ::Input::Base>] inputs
18
+ def inputs
19
+ actors
20
+ end
3
21
 
4
- %w'base msgpack json tsv'.each do |name|
5
- require "fluq/feed/#{name}"
6
- end
22
+ # Listens to an input
23
+ # @param [Class<FluQ::Input::Base>] klass input class
24
+ # @param [multiple] args initialization arguments
25
+ def listen(klass, *args)
26
+ supervise klass, name, handlers, *args
27
+ end
28
+
29
+ # Registers a handler
30
+ # @param [Class<FluQ::Handler::Base>] klass handler class
31
+ # @param [multiple] args initialization arguments
32
+ def register(klass, *args)
33
+ handlers.push [klass, *args]
34
+ end
35
+
36
+ # @return [String] introspection
37
+ def inspect
38
+ "#<#{self.class.name}(#{name}) inputs: #{inputs.size}, handlers: #{handlers.size}>"
39
+ end
40
+
41
+ end
@@ -0,0 +1,6 @@
1
+ module FluQ::Format
2
+ end
3
+
4
+ %w'base lines msgpack json tsv'.each do |name|
5
+ require "fluq/format/#{name}"
6
+ end
@@ -0,0 +1,42 @@
1
+ class FluQ::Format::Base
2
+ include FluQ::Mixins::Loggable
3
+ extend FluQ::Mixins::Loggable
4
+
5
+ # @abstract converter
6
+ # @param [String] raw event string
7
+ # @return [FluQ::Event] event
8
+ def self.to_event(raw)
9
+ end
10
+
11
+ # @abstract initializer
12
+ # @param [Hash] options format-specific options
13
+ def initialize(options = {})
14
+ @options = options
15
+ end
16
+
17
+ # @abstract parse data, return events
18
+ # @param [String] data
19
+ # @return [Array<FluQ::Event>] events
20
+ def parse(data)
21
+ events = []
22
+ parse_each(data) do |raw|
23
+ if event = self.class.to_event(raw)
24
+ events.push(event)
25
+ true
26
+ else
27
+ false
28
+ end
29
+ end
30
+ events
31
+ end
32
+
33
+ protected
34
+
35
+ # @abstract enumerator
36
+ # @param [String] data
37
+ # @yield over raw events
38
+ # @yieldparam [Hash] raw event data
39
+ def parse_each(data)
40
+ end
41
+
42
+ end
@@ -0,0 +1,17 @@
1
+ class FluQ::Format::Json < FluQ::Format::Lines
2
+
3
+ # @see FluQ::Format::Base.to_event
4
+ def self.to_event(raw)
5
+ case hash = MultiJson.load(raw)
6
+ when Hash
7
+ FluQ::Event.new(hash)
8
+ else
9
+ logger.warn "buffer contained invalid event #{hash.inspect}"
10
+ nil
11
+ end
12
+ rescue MultiJson::LoadError
13
+ logger.warn "buffer contained invalid line #{raw.inspect}"
14
+ nil
15
+ end
16
+
17
+ end
@@ -0,0 +1,27 @@
1
+ class FluQ::Format::Lines < FluQ::Format::Base
2
+ include MonitorMixin
3
+
4
+ # @see FluQ::Format::Base#initialize
5
+ def initialize(*)
6
+ super
7
+ @buffer = ""
8
+ end
9
+
10
+ protected
11
+
12
+ # @see FluQ::Format::Base#parse_each
13
+ def parse_each(chunk)
14
+ last_chunk = nil
15
+ synchronize do
16
+ @buffer << chunk
17
+ @buffer.each_line do |line|
18
+ line.chomp!
19
+ next if line.empty?
20
+
21
+ last_chunk = yield(line) ? nil : line
22
+ end
23
+ last_chunk ? @buffer = last_chunk : @buffer.clear
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,28 @@
1
+ class FluQ::Format::Msgpack < FluQ::Format::Base
2
+
3
+ # @see FluQ::Format::Base.to_event
4
+ def self.to_event(raw)
5
+ case raw
6
+ when Hash
7
+ FluQ::Event.new(raw)
8
+ else
9
+ logger.warn "buffer contained invalid event #{raw.inspect}"
10
+ nil
11
+ end
12
+ end
13
+
14
+ # Msgpack initializer
15
+ # @see FluQ::Format::Base#initialize
16
+ def initialize(*)
17
+ super
18
+ @buffer = MessagePack::Unpacker.new
19
+ end
20
+
21
+ protected
22
+
23
+ # @see FluQ::Format::Base#parse_each
24
+ def parse_each(chunk, &block)
25
+ @buffer.feed_each(chunk, &block)
26
+ end
27
+
28
+ end if defined?(MessagePack)
@@ -0,0 +1,19 @@
1
+ class FluQ::Format::Tsv < FluQ::Format::Lines
2
+
3
+ # @see FluQ::Format::Base.to_event
4
+ def self.to_event(raw)
5
+ timestamp, json = raw.split("\t")
6
+
7
+ case hash = MultiJson.load(json)
8
+ when Hash
9
+ FluQ::Event.new hash, timestamp
10
+ else
11
+ logger.warn "buffer contained invalid event #{hash.inspect}"
12
+ nil
13
+ end
14
+ rescue MultiJson::LoadError, ArgumentError
15
+ logger.warn "buffer contained invalid line #{raw.inspect}"
16
+ nil
17
+ end
18
+
19
+ end
data/lib/fluq/handler.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module FluQ::Handler
2
2
  end
3
3
 
4
- %w'base log null'.each do |name|
4
+ %w'base log noop'.each do |name|
5
5
  require "fluq/handler/#{name}"
6
6
  end
@@ -8,43 +8,34 @@ class FluQ::Handler::Base
8
8
  @type ||= name.split("::")[-1].downcase
9
9
  end
10
10
 
11
- # @attr_reader [FluQ::Reactor] reactor
12
- attr_reader :reactor
13
-
14
11
  # @attr_reader [String] name unique name
15
12
  attr_reader :name
16
13
 
17
14
  # @attr_reader [Hash] config
18
15
  attr_reader :config
19
16
 
20
- # @attr_reader [Regexp] pattern
21
- attr_reader :pattern
22
-
23
17
  # @param [Hash] options
24
18
  # @option options [String] :name a (unique) handler identifier
25
- # @option options [String] :pattern tag pattern to match
26
19
  # @example
27
20
  #
28
21
  # class MyHandler < FluQ::Handler::Base
29
22
  # end
30
- # MyHandler.new(reactor, pattern: "visits.*")
23
+ # MyHandler.new
31
24
  #
32
- def initialize(reactor, options = {})
33
- @reactor = reactor
25
+ def initialize(options = {})
34
26
  @config = defaults.merge(options)
35
- @name = config[:name] || generate_name
36
- @pattern = generate_pattern
27
+ @name = config[:name] || self.class.type
37
28
  end
38
29
 
39
- # @return [Boolean] true if event matches
40
- def match?(event)
41
- !!(pattern =~ event.tag)
30
+ # @param [Array<FluQ::Event>] events
31
+ # @return [Array<FluQ::Event>] filtered events
32
+ def filter(events)
33
+ events
42
34
  end
43
35
 
44
- # @param [Array<FluQ::Event>] events
45
- # @return [Array<FluQ::Event>] matching events
46
- def select(events)
47
- events.select {|e| match?(e) }
36
+ # @return [Timers] timers
37
+ def timers
38
+ @timers ||= Timers.new
48
39
  end
49
40
 
50
41
  # @abstract callback, called on each event
@@ -56,25 +47,7 @@ class FluQ::Handler::Base
56
47
 
57
48
  # Configuration defaults
58
49
  def defaults
59
- { pattern: /./, timeout: 60 }
60
- end
61
-
62
- # @return [String] generated name
63
- def generate_name
64
- suffix = [Digest::MD5.digest(config[:pattern].to_s)].pack("m0").tr('+/=lIO0', 'pqrsxyz')[0,6]
65
- [self.class.type, suffix].join("-")
66
- end
67
-
68
- def generate_pattern
69
- return config[:pattern] if Regexp === config[:pattern]
70
-
71
- string = Regexp.quote(config[:pattern])
72
- string.gsub!("\\*", ".*")
73
- string.gsub!("\\?", ".")
74
- string.gsub!(/\\\{(.+?)\\\}/) do |match|
75
- "(?:#{$1.split(",").join("|")})"
76
- end
77
- Regexp.new "^#{string}$"
50
+ { timeout: 60 }
78
51
  end
79
52
 
80
53
  end
@@ -20,10 +20,10 @@ class FluQ::Handler::Log < FluQ::Handler::Base
20
20
  # @see FluQ::Handler::Base#initialize
21
21
  def initialize(*)
22
22
  super
23
- @full_path = FluQ.root.join(config[:path]).to_s.freeze
24
- @rewrite = config[:rewrite]
25
- @convert = config[:convert]
26
- @pool = FilePool.new max_size: config[:cache_max], ttl: config[:cache_ttl]
23
+ @path = config[:path]
24
+ @rewrite = config[:rewrite]
25
+ @convert = config[:convert]
26
+ @pool = FilePool.new max_size: config[:cache_max], ttl: config[:cache_ttl]
27
27
  end
28
28
 
29
29
  # @see FluQ::Handler::Base#on_events
@@ -36,9 +36,8 @@ class FluQ::Handler::Log < FluQ::Handler::Base
36
36
  # Configuration defaults
37
37
  def defaults
38
38
  super.merge \
39
- path: "log/raw/%t/%Y%m%d/%H.log",
40
- rewrite: lambda {|tag| tag.gsub(".", "/") },
41
- convert: lambda {|event| event.to_tsv },
39
+ path: "log/raw/%Y%m%d.log",
40
+ convert: ->evt { [evt.timestamp, MultiJson.dump(evt)].join("\t") },
42
41
  cache_max: 100,
43
42
  cache_ttl: 300
44
43
  end
@@ -46,7 +45,7 @@ class FluQ::Handler::Log < FluQ::Handler::Base
46
45
  def write(path, slice, attepts = 0)
47
46
  io = @pool.open(path)
48
47
  slice.each do |event|
49
- io.write "#{@convert.call(event)}\n"
48
+ io.write @convert.call(event) << "\n"
50
49
  end
51
50
  rescue IOError
52
51
  @pool.delete path.to_s
@@ -54,14 +53,13 @@ class FluQ::Handler::Log < FluQ::Handler::Base
54
53
  end
55
54
 
56
55
  def partition(events)
57
- paths = {}
56
+ paths = Hash.new {|h,k| h[k] = [] }
58
57
  events.each do |event|
59
- tag = @rewrite.call(event.tag)
60
- path = event.time.strftime(@full_path.gsub("%t", tag))
61
- paths[path] ||= []
62
- paths[path] << event
58
+ tag = @rewrite ? @rewrite.call(event).to_s : ""
59
+ path = event.time.strftime(FluQ.root.join(@path).to_s.gsub("%t", tag))
60
+ paths[path] << event
63
61
  end
64
62
  paths
65
63
  end
66
64
 
67
- end
65
+ end