fluq 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.travis.yml +3 -0
  4. data/Gemfile +12 -1
  5. data/Gemfile.lock +44 -8
  6. data/README.md +24 -6
  7. data/Rakefile +8 -1
  8. data/benchmark/socket.rb +13 -25
  9. data/examples/config/multi.rb +52 -0
  10. data/examples/config/simple.rb +15 -0
  11. data/fluq.gemspec +3 -3
  12. data/lib/fluq.rb +22 -16
  13. data/lib/fluq/cli.rb +3 -12
  14. data/lib/fluq/dsl.rb +2 -45
  15. data/lib/fluq/dsl/base.rb +11 -0
  16. data/lib/fluq/dsl/feed.rb +24 -0
  17. data/lib/fluq/dsl/root.rb +35 -0
  18. data/lib/fluq/event.rb +9 -28
  19. data/lib/fluq/feed.rb +40 -5
  20. data/lib/fluq/format.rb +6 -0
  21. data/lib/fluq/format/base.rb +42 -0
  22. data/lib/fluq/format/json.rb +17 -0
  23. data/lib/fluq/format/lines.rb +27 -0
  24. data/lib/fluq/format/msgpack.rb +28 -0
  25. data/lib/fluq/format/tsv.rb +19 -0
  26. data/lib/fluq/handler.rb +1 -1
  27. data/lib/fluq/handler/base.rb +11 -38
  28. data/lib/fluq/handler/log.rb +12 -14
  29. data/lib/fluq/handler/noop.rb +2 -0
  30. data/lib/fluq/input/base.rb +33 -29
  31. data/lib/fluq/input/socket.rb +46 -16
  32. data/lib/fluq/mixins.rb +2 -2
  33. data/lib/fluq/runner.rb +41 -0
  34. data/lib/fluq/testing.rb +5 -11
  35. data/lib/fluq/version.rb +1 -1
  36. data/lib/fluq/worker.rb +73 -0
  37. data/spec/fluq/dsl/feed_spec.rb +33 -0
  38. data/spec/fluq/dsl/root_spec.rb +20 -0
  39. data/spec/fluq/event_spec.rb +17 -12
  40. data/spec/fluq/feed_spec.rb +24 -0
  41. data/spec/fluq/format/base_spec.rb +9 -0
  42. data/spec/fluq/format/json_spec.rb +22 -0
  43. data/spec/fluq/format/lines_spec.rb +20 -0
  44. data/spec/fluq/format/msgpack_spec.rb +22 -0
  45. data/spec/fluq/format/tsv_spec.rb +21 -0
  46. data/spec/fluq/handler/base_spec.rb +7 -52
  47. data/spec/fluq/handler/log_spec.rb +11 -14
  48. data/spec/fluq/handler/{null_spec.rb → noop_spec.rb} +1 -3
  49. data/spec/fluq/input/base_spec.rb +48 -15
  50. data/spec/fluq/input/socket_spec.rb +34 -26
  51. data/spec/fluq/mixins/loggable_spec.rb +2 -2
  52. data/spec/fluq/runner_spec.rb +18 -0
  53. data/spec/fluq/worker_spec.rb +87 -0
  54. data/spec/fluq_spec.rb +1 -2
  55. data/spec/scenario/config/nested/feed1.rb +6 -0
  56. data/spec/scenario/config/test.rb +8 -2
  57. data/spec/spec_helper.rb +7 -26
  58. metadata +62 -62
  59. data/benchmark/logging.rb +0 -37
  60. data/examples/common.rb +0 -3
  61. data/examples/simple.rb +0 -5
  62. data/lib/fluq/buffer.rb +0 -6
  63. data/lib/fluq/buffer/base.rb +0 -51
  64. data/lib/fluq/buffer/file.rb +0 -68
  65. data/lib/fluq/feed/base.rb +0 -37
  66. data/lib/fluq/feed/json.rb +0 -28
  67. data/lib/fluq/feed/msgpack.rb +0 -27
  68. data/lib/fluq/feed/tsv.rb +0 -30
  69. data/lib/fluq/handler/null.rb +0 -4
  70. data/lib/fluq/input/socket/connection.rb +0 -41
  71. data/lib/fluq/mixins/logger.rb +0 -26
  72. data/lib/fluq/reactor.rb +0 -79
  73. data/spec/fluq/buffer/base_spec.rb +0 -21
  74. data/spec/fluq/buffer/file_spec.rb +0 -47
  75. data/spec/fluq/dsl_spec.rb +0 -43
  76. data/spec/fluq/feed/base_spec.rb +0 -15
  77. data/spec/fluq/feed/json_spec.rb +0 -27
  78. data/spec/fluq/feed/msgpack_spec.rb +0 -27
  79. data/spec/fluq/feed/tsv_spec.rb +0 -27
  80. data/spec/fluq/input/socket/connection_spec.rb +0 -35
  81. data/spec/fluq/mixins/logger_spec.rb +0 -25
  82. data/spec/fluq/reactor_spec.rb +0 -69
  83. data/spec/scenario/config/nested/common.rb +0 -3
@@ -0,0 +1,11 @@
1
+ class FluQ::DSL::Base
2
+
3
+ protected
4
+
5
+ def constantize(*path)
6
+ require([:fluq, *path].join('/'))
7
+ names = path.map {|p| p.to_s.split('_').map(&:capitalize).join }
8
+ names.inject(FluQ) {|klass, name| klass.const_get(name) }
9
+ end
10
+
11
+ end
@@ -0,0 +1,24 @@
1
+ # Feed-level DSL configuration
2
+ class FluQ::DSL::Feed < FluQ::DSL::Base
3
+ attr_reader :name, :inputs, :handlers
4
+
5
+ def initialize(name, &block)
6
+ @name = name
7
+ @inputs = []
8
+ @handlers = []
9
+ instance_eval(&block)
10
+ end
11
+
12
+ # @param [Array<Symbol>] input type path, e.g. :socket
13
+ def input(*type, &block)
14
+ klass = constantize(:input, *type)
15
+ inputs.push [klass, FluQ::DSL::Options.new(&block).to_hash]
16
+ end
17
+
18
+ # @param [Array<Symbol>] handler type path, e.g. :log, :counter
19
+ def handler(*type, &block)
20
+ klass = constantize(:handler, *type)
21
+ handlers.push [klass, FluQ::DSL::Options.new(&block).to_hash]
22
+ end
23
+
24
+ end
@@ -0,0 +1,35 @@
1
+ # Root-level DSL configuration
2
+ class FluQ::DSL::Root < FluQ::DSL::Base
3
+ attr_reader :path, :feeds
4
+
5
+ # @param [String] DSL script file path
6
+ def initialize(path)
7
+ @path = Pathname.new(path)
8
+ @feeds = []
9
+
10
+ instance_eval @path.read
11
+ end
12
+
13
+ # @param [String] feed name, e.g. "my_events"
14
+ def feed(name, &block)
15
+ feeds.push FluQ::DSL::Feed.new(name, &block)
16
+ end
17
+
18
+ # @param [String] relative relative path
19
+ def import(relative)
20
+ instance_eval path.dirname.join(relative).read
21
+ end
22
+
23
+ # Applies the configuration.
24
+ # Registers components of each feed. Handlers first, then inputs.
25
+ # @param [FluQ::Runner] runner
26
+ def apply(runner)
27
+ feeds.each do |conf|
28
+ runner.feed conf.name do |feed|
29
+ conf.handlers.each {|k, *a| feed.register(k, *a) }
30
+ conf.inputs.each {|k, *a| feed.listen(k, *a) }
31
+ end
32
+ end
33
+ end
34
+
35
+ end
data/lib/fluq/event.rb CHANGED
@@ -1,12 +1,13 @@
1
1
  class FluQ::Event < Hash
2
2
 
3
- attr_reader :tag, :timestamp
3
+ attr_accessor :timestamp
4
+ attr_reader :meta
4
5
 
5
- # @param [String] tag the event tag
6
- # @param [Integer] timestamp the UNIX timestamp
7
6
  # @param [Hash] record the attribute pairs
8
- def initialize(tag = "", timestamp = 0, record = {})
9
- @tag, @timestamp = tag.to_s, timestamp.to_i
7
+ # @param [Integer] timestamp the UNIX timestamp
8
+ def initialize(record = {}, timestamp = Time.now)
9
+ @timestamp = timestamp.to_i
10
+ @meta = {}
10
11
  super()
11
12
  update(record) if Hash === record
12
13
  end
@@ -16,40 +17,20 @@ class FluQ::Event < Hash
16
17
  @time ||= Time.at(timestamp).utc
17
18
  end
18
19
 
19
- # @return [Array] tuple
20
- def to_a
21
- [tag, timestamp, self]
22
- end
23
-
24
20
  # @return [Boolean] true if comparable
25
21
  def ==(other)
26
22
  case other
27
- when Array
28
- to_a == other
23
+ when FluQ::Event
24
+ super && other.timestamp == timestamp
29
25
  else
30
26
  super
31
27
  end
32
28
  end
33
29
  alias :eql? :==
34
30
 
35
- # @return [String] tab-separated string
36
- def to_tsv
37
- [tag, timestamp, Oj.dump(self)].join("\t")
38
- end
39
-
40
- # @return [String] JSON encoded
41
- def to_json
42
- Oj.dump merge("=" => tag, "@" => timestamp)
43
- end
44
-
45
- # @return [String] mgspack encoded bytes
46
- def to_msgpack
47
- MessagePack.pack merge("=" => tag, "@" => timestamp)
48
- end
49
-
50
31
  # @return [String] inspection
51
32
  def inspect
52
- [tag, timestamp, Hash.new.update(self)].inspect
33
+ "#<FluQ::Event(#{timestamp}) data:#{super} meta:#{meta.inspect}>"
53
34
  end
54
35
 
55
36
  end
data/lib/fluq/feed.rb CHANGED
@@ -1,6 +1,41 @@
1
- module FluQ::Feed
2
- end
1
+ class FluQ::Feed < Celluloid::SupervisionGroup
2
+
3
+ # @attr_reader [String] name
4
+ attr_reader :name
5
+
6
+ # @attr_reader [Array] handlers
7
+ attr_reader :handlers
8
+
9
+ # Constructor
10
+ # @param [String] name feed name
11
+ def initialize(name, &block)
12
+ @name = name.to_s
13
+ @handlers = []
14
+ super(&block)
15
+ end
16
+
17
+ # @return [Array<FluQ::Input::Base>] inputs
18
+ def inputs
19
+ actors
20
+ end
3
21
 
4
- %w'base msgpack json tsv'.each do |name|
5
- require "fluq/feed/#{name}"
6
- end
22
+ # Listens to an input
23
+ # @param [Class<FluQ::Input::Base>] klass input class
24
+ # @param [multiple] args initialization arguments
25
+ def listen(klass, *args)
26
+ supervise klass, name, handlers, *args
27
+ end
28
+
29
+ # Registers a handler
30
+ # @param [Class<FluQ::Handler::Base>] klass handler class
31
+ # @param [multiple] args initialization arguments
32
+ def register(klass, *args)
33
+ handlers.push [klass, *args]
34
+ end
35
+
36
+ # @return [String] introspection
37
+ def inspect
38
+ "#<#{self.class.name}(#{name}) inputs: #{inputs.size}, handlers: #{handlers.size}>"
39
+ end
40
+
41
+ end
@@ -0,0 +1,6 @@
1
+ module FluQ::Format
2
+ end
3
+
4
+ %w'base lines msgpack json tsv'.each do |name|
5
+ require "fluq/format/#{name}"
6
+ end
@@ -0,0 +1,42 @@
1
+ class FluQ::Format::Base
2
+ include FluQ::Mixins::Loggable
3
+ extend FluQ::Mixins::Loggable
4
+
5
+ # @abstract converter
6
+ # @param [String] raw event string
7
+ # @return [FluQ::Event] event
8
+ def self.to_event(raw)
9
+ end
10
+
11
+ # @abstract initializer
12
+ # @param [Hash] options format-specific options
13
+ def initialize(options = {})
14
+ @options = options
15
+ end
16
+
17
+ # @abstract parse data, return events
18
+ # @param [String] data
19
+ # @return [Array<FluQ::Event>] events
20
+ def parse(data)
21
+ events = []
22
+ parse_each(data) do |raw|
23
+ if event = self.class.to_event(raw)
24
+ events.push(event)
25
+ true
26
+ else
27
+ false
28
+ end
29
+ end
30
+ events
31
+ end
32
+
33
+ protected
34
+
35
+ # @abstract enumerator
36
+ # @param [String] data
37
+ # @yield over raw events
38
+ # @yieldparam [Hash] raw event data
39
+ def parse_each(data)
40
+ end
41
+
42
+ end
@@ -0,0 +1,17 @@
1
+ class FluQ::Format::Json < FluQ::Format::Lines
2
+
3
+ # @see FluQ::Format::Base.to_event
4
+ def self.to_event(raw)
5
+ case hash = MultiJson.load(raw)
6
+ when Hash
7
+ FluQ::Event.new(hash)
8
+ else
9
+ logger.warn "buffer contained invalid event #{hash.inspect}"
10
+ nil
11
+ end
12
+ rescue MultiJson::LoadError
13
+ logger.warn "buffer contained invalid line #{raw.inspect}"
14
+ nil
15
+ end
16
+
17
+ end
@@ -0,0 +1,27 @@
1
+ class FluQ::Format::Lines < FluQ::Format::Base
2
+ include MonitorMixin
3
+
4
+ # @see FluQ::Format::Base#initialize
5
+ def initialize(*)
6
+ super
7
+ @buffer = ""
8
+ end
9
+
10
+ protected
11
+
12
+ # @see FluQ::Format::Base#parse_each
13
+ def parse_each(chunk)
14
+ last_chunk = nil
15
+ synchronize do
16
+ @buffer << chunk
17
+ @buffer.each_line do |line|
18
+ line.chomp!
19
+ next if line.empty?
20
+
21
+ last_chunk = yield(line) ? nil : line
22
+ end
23
+ last_chunk ? @buffer = last_chunk : @buffer.clear
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,28 @@
1
+ class FluQ::Format::Msgpack < FluQ::Format::Base
2
+
3
+ # @see FluQ::Format::Base.to_event
4
+ def self.to_event(raw)
5
+ case raw
6
+ when Hash
7
+ FluQ::Event.new(raw)
8
+ else
9
+ logger.warn "buffer contained invalid event #{raw.inspect}"
10
+ nil
11
+ end
12
+ end
13
+
14
+ # Msgpack initializer
15
+ # @see FluQ::Format::Base#initialize
16
+ def initialize(*)
17
+ super
18
+ @buffer = MessagePack::Unpacker.new
19
+ end
20
+
21
+ protected
22
+
23
+ # @see FluQ::Format::Base#parse_each
24
+ def parse_each(chunk, &block)
25
+ @buffer.feed_each(chunk, &block)
26
+ end
27
+
28
+ end if defined?(MessagePack)
@@ -0,0 +1,19 @@
1
+ class FluQ::Format::Tsv < FluQ::Format::Lines
2
+
3
+ # @see FluQ::Format::Base.to_event
4
+ def self.to_event(raw)
5
+ timestamp, json = raw.split("\t")
6
+
7
+ case hash = MultiJson.load(json)
8
+ when Hash
9
+ FluQ::Event.new hash, timestamp
10
+ else
11
+ logger.warn "buffer contained invalid event #{hash.inspect}"
12
+ nil
13
+ end
14
+ rescue MultiJson::LoadError, ArgumentError
15
+ logger.warn "buffer contained invalid line #{raw.inspect}"
16
+ nil
17
+ end
18
+
19
+ end
data/lib/fluq/handler.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  module FluQ::Handler
2
2
  end
3
3
 
4
- %w'base log null'.each do |name|
4
+ %w'base log noop'.each do |name|
5
5
  require "fluq/handler/#{name}"
6
6
  end
@@ -8,43 +8,34 @@ class FluQ::Handler::Base
8
8
  @type ||= name.split("::")[-1].downcase
9
9
  end
10
10
 
11
- # @attr_reader [FluQ::Reactor] reactor
12
- attr_reader :reactor
13
-
14
11
  # @attr_reader [String] name unique name
15
12
  attr_reader :name
16
13
 
17
14
  # @attr_reader [Hash] config
18
15
  attr_reader :config
19
16
 
20
- # @attr_reader [Regexp] pattern
21
- attr_reader :pattern
22
-
23
17
  # @param [Hash] options
24
18
  # @option options [String] :name a (unique) handler identifier
25
- # @option options [String] :pattern tag pattern to match
26
19
  # @example
27
20
  #
28
21
  # class MyHandler < FluQ::Handler::Base
29
22
  # end
30
- # MyHandler.new(reactor, pattern: "visits.*")
23
+ # MyHandler.new
31
24
  #
32
- def initialize(reactor, options = {})
33
- @reactor = reactor
25
+ def initialize(options = {})
34
26
  @config = defaults.merge(options)
35
- @name = config[:name] || generate_name
36
- @pattern = generate_pattern
27
+ @name = config[:name] || self.class.type
37
28
  end
38
29
 
39
- # @return [Boolean] true if event matches
40
- def match?(event)
41
- !!(pattern =~ event.tag)
30
+ # @param [Array<FluQ::Event>] events
31
+ # @return [Array<FluQ::Event>] filtered events
32
+ def filter(events)
33
+ events
42
34
  end
43
35
 
44
- # @param [Array<FluQ::Event>] events
45
- # @return [Array<FluQ::Event>] matching events
46
- def select(events)
47
- events.select {|e| match?(e) }
36
+ # @return [Timers] timers
37
+ def timers
38
+ @timers ||= Timers.new
48
39
  end
49
40
 
50
41
  # @abstract callback, called on each event
@@ -56,25 +47,7 @@ class FluQ::Handler::Base
56
47
 
57
48
  # Configuration defaults
58
49
  def defaults
59
- { pattern: /./, timeout: 60 }
60
- end
61
-
62
- # @return [String] generated name
63
- def generate_name
64
- suffix = [Digest::MD5.digest(config[:pattern].to_s)].pack("m0").tr('+/=lIO0', 'pqrsxyz')[0,6]
65
- [self.class.type, suffix].join("-")
66
- end
67
-
68
- def generate_pattern
69
- return config[:pattern] if Regexp === config[:pattern]
70
-
71
- string = Regexp.quote(config[:pattern])
72
- string.gsub!("\\*", ".*")
73
- string.gsub!("\\?", ".")
74
- string.gsub!(/\\\{(.+?)\\\}/) do |match|
75
- "(?:#{$1.split(",").join("|")})"
76
- end
77
- Regexp.new "^#{string}$"
50
+ { timeout: 60 }
78
51
  end
79
52
 
80
53
  end
@@ -20,10 +20,10 @@ class FluQ::Handler::Log < FluQ::Handler::Base
20
20
  # @see FluQ::Handler::Base#initialize
21
21
  def initialize(*)
22
22
  super
23
- @full_path = FluQ.root.join(config[:path]).to_s.freeze
24
- @rewrite = config[:rewrite]
25
- @convert = config[:convert]
26
- @pool = FilePool.new max_size: config[:cache_max], ttl: config[:cache_ttl]
23
+ @path = config[:path]
24
+ @rewrite = config[:rewrite]
25
+ @convert = config[:convert]
26
+ @pool = FilePool.new max_size: config[:cache_max], ttl: config[:cache_ttl]
27
27
  end
28
28
 
29
29
  # @see FluQ::Handler::Base#on_events
@@ -36,9 +36,8 @@ class FluQ::Handler::Log < FluQ::Handler::Base
36
36
  # Configuration defaults
37
37
  def defaults
38
38
  super.merge \
39
- path: "log/raw/%t/%Y%m%d/%H.log",
40
- rewrite: lambda {|tag| tag.gsub(".", "/") },
41
- convert: lambda {|event| event.to_tsv },
39
+ path: "log/raw/%Y%m%d.log",
40
+ convert: ->evt { [evt.timestamp, MultiJson.dump(evt)].join("\t") },
42
41
  cache_max: 100,
43
42
  cache_ttl: 300
44
43
  end
@@ -46,7 +45,7 @@ class FluQ::Handler::Log < FluQ::Handler::Base
46
45
  def write(path, slice, attepts = 0)
47
46
  io = @pool.open(path)
48
47
  slice.each do |event|
49
- io.write "#{@convert.call(event)}\n"
48
+ io.write @convert.call(event) << "\n"
50
49
  end
51
50
  rescue IOError
52
51
  @pool.delete path.to_s
@@ -54,14 +53,13 @@ class FluQ::Handler::Log < FluQ::Handler::Base
54
53
  end
55
54
 
56
55
  def partition(events)
57
- paths = {}
56
+ paths = Hash.new {|h,k| h[k] = [] }
58
57
  events.each do |event|
59
- tag = @rewrite.call(event.tag)
60
- path = event.time.strftime(@full_path.gsub("%t", tag))
61
- paths[path] ||= []
62
- paths[path] << event
58
+ tag = @rewrite ? @rewrite.call(event).to_s : ""
59
+ path = event.time.strftime(FluQ.root.join(@path).to_s.gsub("%t", tag))
60
+ paths[path] << event
63
61
  end
64
62
  paths
65
63
  end
66
64
 
67
- end
65
+ end