fluq 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. data/.gitignore +3 -0
  2. data/.travis.yml +6 -0
  3. data/Gemfile +6 -0
  4. data/Gemfile.lock +39 -0
  5. data/MIT-LICENCE +19 -0
  6. data/README.md +10 -0
  7. data/Rakefile +11 -0
  8. data/benchmark/logging.rb +37 -0
  9. data/benchmark/socket.rb +52 -0
  10. data/bin/fluq-rb +8 -0
  11. data/examples/common.rb +3 -0
  12. data/examples/simple.rb +5 -0
  13. data/fluq.gemspec +33 -0
  14. data/lib/fluq.rb +50 -0
  15. data/lib/fluq/buffer.rb +6 -0
  16. data/lib/fluq/buffer/base.rb +51 -0
  17. data/lib/fluq/buffer/file.rb +68 -0
  18. data/lib/fluq/cli.rb +142 -0
  19. data/lib/fluq/dsl.rb +49 -0
  20. data/lib/fluq/dsl/options.rb +27 -0
  21. data/lib/fluq/error.rb +2 -0
  22. data/lib/fluq/event.rb +55 -0
  23. data/lib/fluq/feed.rb +6 -0
  24. data/lib/fluq/feed/base.rb +18 -0
  25. data/lib/fluq/feed/json.rb +28 -0
  26. data/lib/fluq/feed/msgpack.rb +27 -0
  27. data/lib/fluq/feed/tsv.rb +30 -0
  28. data/lib/fluq/handler.rb +6 -0
  29. data/lib/fluq/handler/base.rb +80 -0
  30. data/lib/fluq/handler/log.rb +67 -0
  31. data/lib/fluq/handler/null.rb +4 -0
  32. data/lib/fluq/input.rb +6 -0
  33. data/lib/fluq/input/base.rb +59 -0
  34. data/lib/fluq/input/socket.rb +50 -0
  35. data/lib/fluq/input/socket/connection.rb +41 -0
  36. data/lib/fluq/mixins.rb +6 -0
  37. data/lib/fluq/mixins/loggable.rb +7 -0
  38. data/lib/fluq/mixins/logger.rb +26 -0
  39. data/lib/fluq/reactor.rb +76 -0
  40. data/lib/fluq/testing.rb +26 -0
  41. data/lib/fluq/url.rb +16 -0
  42. data/lib/fluq/version.rb +3 -0
  43. data/spec/fluq/buffer/base_spec.rb +21 -0
  44. data/spec/fluq/buffer/file_spec.rb +47 -0
  45. data/spec/fluq/dsl/options_spec.rb +24 -0
  46. data/spec/fluq/dsl_spec.rb +43 -0
  47. data/spec/fluq/event_spec.rb +25 -0
  48. data/spec/fluq/feed/base_spec.rb +15 -0
  49. data/spec/fluq/feed/json_spec.rb +27 -0
  50. data/spec/fluq/feed/msgpack_spec.rb +27 -0
  51. data/spec/fluq/feed/tsv_spec.rb +27 -0
  52. data/spec/fluq/handler/base_spec.rb +70 -0
  53. data/spec/fluq/handler/log_spec.rb +68 -0
  54. data/spec/fluq/handler/null_spec.rb +11 -0
  55. data/spec/fluq/input/base_spec.rb +29 -0
  56. data/spec/fluq/input/socket/connection_spec.rb +35 -0
  57. data/spec/fluq/input/socket_spec.rb +45 -0
  58. data/spec/fluq/mixins/loggable_spec.rb +10 -0
  59. data/spec/fluq/mixins/logger_spec.rb +25 -0
  60. data/spec/fluq/reactor_spec.rb +58 -0
  61. data/spec/fluq/url_spec.rb +16 -0
  62. data/spec/fluq_spec.rb +11 -0
  63. data/spec/scenario/config/nested/common.rb +3 -0
  64. data/spec/scenario/config/test.rb +3 -0
  65. data/spec/scenario/lib/fluq/handler/custom/test_handler.rb +4 -0
  66. data/spec/spec_helper.rb +12 -0
  67. data/spec/support/configuration.rb +25 -0
  68. metadata +242 -0
data/lib/fluq/dsl.rb ADDED
@@ -0,0 +1,49 @@
1
+ class FluQ::DSL
2
+ attr_reader :path, :reactor, :inputs, :handlers
3
+
4
+ # @param [FluQ::Reactor] reactor
5
+ # @param [String] DSL script file path
6
+ def initialize(reactor, path)
7
+ @reactor = reactor
8
+ @path = Pathname.new(path)
9
+ @inputs = []
10
+ @handlers = []
11
+ end
12
+
13
+ # @param [Array<Symbol>] input type path, e.g. :socket
14
+ def input(*type, &block)
15
+ klass = constantize(:input, *type)
16
+ inputs.push [klass, FluQ::DSL::Options.new(&block).to_hash]
17
+ end
18
+
19
+ # @param [Array<Symbol>] handler type path, e.g. :log, :counter
20
+ def handler(*type, &block)
21
+ klass = constantize(:handler, *type)
22
+ handlers.push [klass, FluQ::DSL::Options.new(&block).to_hash]
23
+ end
24
+
25
+ # @param [String] relative relative path
26
+ def import(relative)
27
+ instance_eval(path.dirname.join(relative).read)
28
+ end
29
+
30
+ # Starts the components. Handlers first, then inputs.
31
+ def run
32
+ instance_eval(path.read)
33
+ handlers.each {|klass, options| reactor.register(klass, options) }
34
+ inputs.each {|klass, options| reactor.listen(klass, options) }
35
+ end
36
+
37
+ protected
38
+
39
+ def constantize(*path)
40
+ require([:fluq, *path].join('/'))
41
+ names = path.map {|p| p.to_s.split('_').map(&:capitalize).join }
42
+ names.inject(FluQ) {|klass, name| klass.const_get(name) }
43
+ end
44
+
45
+ end
46
+
47
+ %w'options'.each do |name|
48
+ require "fluq/dsl/#{name}"
49
+ end
@@ -0,0 +1,27 @@
1
+ class FluQ::DSL::Options
2
+
3
+ # Constructor
4
+ # @yield options assigment
5
+ def initialize(&block)
6
+ @opts = {}
7
+ instance_eval(&block) if block
8
+ end
9
+
10
+ # @return [Hash] options hash
11
+ def to_hash
12
+ @opts
13
+ end
14
+
15
+ protected
16
+
17
+ def method_missing(name, *args, &block)
18
+ value = args[0]
19
+ if value && block
20
+ @opts[name.to_sym] = value
21
+ @opts[:"#{name}_options"] = self.class.new(&block).to_hash
22
+ else
23
+ @opts[name.to_sym] = value || block || true
24
+ end
25
+ end
26
+
27
+ end
data/lib/fluq/error.rb ADDED
@@ -0,0 +1,2 @@
1
+ class FluQ::Error < StandardError
2
+ end
data/lib/fluq/event.rb ADDED
@@ -0,0 +1,55 @@
1
+ class FluQ::Event < Hash
2
+
3
+ attr_reader :tag, :timestamp
4
+
5
+ # @param [String] tag the event tag
6
+ # @param [Integer] timestamp the UNIX timestamp
7
+ # @param [Hash] record the attribute pairs
8
+ def initialize(tag = "", timestamp = 0, record = {})
9
+ @tag, @timestamp = tag.to_s, timestamp.to_i
10
+ super()
11
+ update(record) if Hash === record
12
+ end
13
+
14
+ # @return [Time] UTC time
15
+ def time
16
+ @time ||= Time.at(timestamp).utc
17
+ end
18
+
19
+ # @return [Array] tuple
20
+ def to_a
21
+ [tag, timestamp, self]
22
+ end
23
+
24
+ # @return [Boolean] true if comparable
25
+ def ==(other)
26
+ case other
27
+ when Array
28
+ to_a == other
29
+ else
30
+ super
31
+ end
32
+ end
33
+ alias :eql? :==
34
+
35
+ # @return [String] tab-separated string
36
+ def to_tsv
37
+ [tag, timestamp, Oj.dump(self)].join("\t")
38
+ end
39
+
40
+ # @return [String] JSON encoded
41
+ def to_json
42
+ Oj.dump merge("=" => tag, "@" => timestamp)
43
+ end
44
+
45
+ # @return [String] mgspack encoded bytes
46
+ def to_msgpack
47
+ MessagePack.pack merge("=" => tag, "@" => timestamp)
48
+ end
49
+
50
+ # @return [String] inspection
51
+ def inspect
52
+ [tag, timestamp, Hash.new.update(self)].inspect
53
+ end
54
+
55
+ end
data/lib/fluq/feed.rb ADDED
@@ -0,0 +1,6 @@
1
+ module FluQ::Feed
2
+ end
3
+
4
+ %w'base msgpack json tsv'.each do |name|
5
+ require "fluq/feed/#{name}"
6
+ end
@@ -0,0 +1,18 @@
1
+ class FluQ::Feed::Base
2
+ include Enumerable
3
+ include FluQ::Mixins::Loggable
4
+
5
+ # @attr_reader [FluQ::Buffer::Base] buffer
6
+ attr_reader :buffer
7
+
8
+ # @param [FluQ::Buffer::Base] buffer
9
+ def initialize(buffer)
10
+ @buffer = buffer
11
+ end
12
+
13
+ # @abstract enumerator
14
+ # @yield ober a feed of events
15
+ # @yieldparam [FluQ::Event] event
16
+ def each
17
+ end
18
+ end
@@ -0,0 +1,28 @@
1
+ class FluQ::Feed::Json < FluQ::Feed::Base
2
+
3
+ # @see [FluQ::Feed::Base] each
4
+ def each
5
+ buffer.drain do |io|
6
+ while line = io.gets
7
+ event = to_event(line)
8
+ yield event if event
9
+ end
10
+ end
11
+ end
12
+
13
+ private
14
+
15
+ def to_event(line)
16
+ case hash = Oj.load(line)
17
+ when Hash
18
+ FluQ::Event.new hash.delete("="), hash.delete("@"), hash
19
+ else
20
+ logger.warn "buffer contained invalid event #{hash.inspect}"
21
+ nil
22
+ end
23
+ rescue Oj::ParseError
24
+ logger.warn "buffer contained invalid line #{line.inspect}"
25
+ nil
26
+ end
27
+
28
+ end
@@ -0,0 +1,27 @@
1
+ class FluQ::Feed::Msgpack < FluQ::Feed::Base
2
+
3
+ # @see [FluQ::Feed::Base] each
4
+ def each
5
+ buffer.drain do |io|
6
+ pac = MessagePack::Unpacker.new(io)
7
+ pac.each do |hash|
8
+ event = to_event(hash)
9
+ yield event if event
10
+ end
11
+ end
12
+ rescue EOFError
13
+ end
14
+
15
+ private
16
+
17
+ def to_event(hash)
18
+ case hash
19
+ when Hash
20
+ FluQ::Event.new hash.delete("="), hash.delete("@"), hash
21
+ else
22
+ logger.warn "buffer contained invalid event #{hash.inspect}"
23
+ nil
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,30 @@
1
+ class FluQ::Feed::Tsv < FluQ::Feed::Base
2
+
3
+ # @see [FluQ::Feed::Base] each
4
+ def each
5
+ buffer.drain do |io|
6
+ while line = io.gets
7
+ event = to_event(line)
8
+ yield event if event
9
+ end
10
+ end
11
+ end
12
+
13
+ private
14
+
15
+ def to_event(line)
16
+ tag, timestamp, json = line.split("\t")
17
+
18
+ case hash = Oj.load(json)
19
+ when Hash
20
+ FluQ::Event.new tag, timestamp, hash
21
+ else
22
+ logger.warn "buffer contained invalid event #{[tag, timestamp, hash].inspect}"
23
+ nil
24
+ end
25
+ rescue Oj::ParseError, ArgumentError
26
+ logger.warn "buffer contained invalid line #{line.inspect}"
27
+ nil
28
+ end
29
+
30
+ end
@@ -0,0 +1,6 @@
1
+ module FluQ::Handler
2
+ end
3
+
4
+ %w'base log null'.each do |name|
5
+ require "fluq/handler/#{name}"
6
+ end
@@ -0,0 +1,80 @@
1
+ require 'digest/md5'
2
+
3
+ class FluQ::Handler::Base
4
+ include FluQ::Mixins::Loggable
5
+
6
+ # @return [String] handler type
7
+ def self.type
8
+ @type ||= name.split("::")[-1].downcase
9
+ end
10
+
11
+ # @attr_reader [FluQ::Reactor] reactor
12
+ attr_reader :reactor
13
+
14
+ # @attr_reader [String] name unique name
15
+ attr_reader :name
16
+
17
+ # @attr_reader [Hash] config
18
+ attr_reader :config
19
+
20
+ # @attr_reader [Regexp] pattern
21
+ attr_reader :pattern
22
+
23
+ # @param [Hash] options
24
+ # @option options [String] :name a (unique) handler identifier
25
+ # @option options [String] :pattern tag pattern to match
26
+ # @example
27
+ #
28
+ # class MyHandler < FluQ::Handler::Base
29
+ # end
30
+ # MyHandler.new(reactor, pattern: "visits.*")
31
+ #
32
+ def initialize(reactor, options = {})
33
+ @reactor = reactor
34
+ @config = defaults.merge(options)
35
+ @name = config[:name] || generate_name
36
+ @pattern = generate_pattern
37
+ end
38
+
39
+ # @return [Boolean] true if event matches
40
+ def match?(event)
41
+ !!(pattern =~ event.tag)
42
+ end
43
+
44
+ # @param [Array<FluQ::Event>] events
45
+ # @return [Array<FluQ::Event>] matching events
46
+ def select(events)
47
+ events.select &method(:match?)
48
+ end
49
+
50
+ # @abstract callback, called on each event
51
+ # @param [Array<FluQ::Event>] the event stream
52
+ def on_events(events)
53
+ end
54
+
55
+ protected
56
+
57
+ # Configuration defaults
58
+ def defaults
59
+ { pattern: /./ }
60
+ end
61
+
62
+ # @return [String] generated name
63
+ def generate_name
64
+ suffix = [Digest::MD5.digest(config[:pattern].to_s)].pack("m0").tr('+/=lIO0', 'pqrsxyz')[0,6]
65
+ [self.class.type, suffix].join("-")
66
+ end
67
+
68
+ def generate_pattern
69
+ return config[:pattern] if Regexp === config[:pattern]
70
+
71
+ string = Regexp.quote(config[:pattern])
72
+ string.gsub!("\\*", ".*")
73
+ string.gsub!("\\?", ".")
74
+ string.gsub!(/\\\{(.+?)\\\}/) do |match|
75
+ "(?:#{$1.split(",").join("|")})"
76
+ end
77
+ Regexp.new "^#{string}$"
78
+ end
79
+
80
+ end
@@ -0,0 +1,67 @@
1
+ class FluQ::Handler::Log < FluQ::Handler::Base
2
+
3
+ class FilePool < TimedLRU
4
+
5
+ def open(path)
6
+ path = path.to_s
7
+ self[path.to_s] ||= begin
8
+ FileUtils.mkdir_p File.dirname(path)
9
+ file = File.open(path, "a+")
10
+ file.autoclose = true
11
+ file
12
+ end
13
+ end
14
+
15
+ end
16
+
17
+ # @attr_reader [FluQ::Handler::Log::FilePool] file pool
18
+ attr_reader :pool
19
+
20
+ # @see FluQ::Handler::Base#initialize
21
+ def initialize(*)
22
+ super
23
+ @full_path = FluQ.root.join(config[:path]).to_s.freeze
24
+ @rewrite = config[:rewrite]
25
+ @convert = config[:convert]
26
+ @pool = FilePool.new max_size: config[:cache_max], ttl: config[:cache_ttl]
27
+ end
28
+
29
+ # @see FluQ::Handler::Base#on_events
30
+ def on_events(events)
31
+ partition(events).each {|path, slice| write(path, slice) }
32
+ end
33
+
34
+ protected
35
+
36
+ # Configuration defaults
37
+ def defaults
38
+ super.merge \
39
+ path: "log/raw/%t/%Y%m%d/%H.log",
40
+ rewrite: lambda {|tag| tag.gsub(".", "/") },
41
+ convert: lambda {|event| event.to_tsv },
42
+ cache_max: 100,
43
+ cache_ttl: 300
44
+ end
45
+
46
+ def write(path, slice, attepts = 0)
47
+ io = @pool.open(path)
48
+ slice.each do |event|
49
+ io.write "#{@convert.call(event)}\n"
50
+ end
51
+ rescue IOError
52
+ @pool.delete path.to_s
53
+ (attepts+=1) < 3 ? retry : raise
54
+ end
55
+
56
+ def partition(events)
57
+ paths = {}
58
+ events.each do |event|
59
+ tag = @rewrite.call(event.tag)
60
+ path = event.time.strftime(@full_path.gsub("%t", tag))
61
+ paths[path] ||= []
62
+ paths[path] << event
63
+ end
64
+ paths
65
+ end
66
+
67
+ end
@@ -0,0 +1,4 @@
1
+ class FluQ::Handler::Null < FluQ::Handler::Base
2
+ def on_events(events)
3
+ end
4
+ end
data/lib/fluq/input.rb ADDED
@@ -0,0 +1,6 @@
1
+ module FluQ::Input
2
+ end
3
+
4
+ %w'base socket'.each do |name|
5
+ require "fluq/input/#{name}"
6
+ end
@@ -0,0 +1,59 @@
1
+ class FluQ::Input::Base
2
+ include FluQ::Mixins::Loggable
3
+
4
+ # @attr_reader [FluQ::Reactor] reactor reference
5
+ attr_reader :reactor
6
+
7
+ # @attr_reader [Hash] config
8
+ attr_reader :config
9
+
10
+ # @param [FluQ::Reactor] reactor
11
+ # @param [Hash] options various configuration options
12
+ def initialize(reactor, options = {})
13
+ super()
14
+ @reactor = reactor
15
+ @config = defaults.merge(options)
16
+ end
17
+
18
+ # @return [String] descriptive name
19
+ def name
20
+ @name ||= self.class.name.split("::")[-1].downcase
21
+ end
22
+
23
+ # Start the input
24
+ def run
25
+ end
26
+
27
+ # Creates a new buffer object
28
+ # @return [FluQ::Buffer::Base] a new buffer
29
+ def new_buffer
30
+ buffer_klass.new config[:buffer_options]
31
+ end
32
+
33
+ # Flushes and closes a buffer
34
+ # @param [FluQ::Buffer::Base] buffer
35
+ def flush!(buffer)
36
+ feed_klass.new(buffer).each_slice(10_000) do |events|
37
+ reactor.process(events)
38
+ end
39
+ rescue => ex
40
+ logger.crash "#{self.class.name} failure: #{ex.message} (#{ex.class.name})", ex
41
+ ensure
42
+ buffer.close if buffer
43
+ end
44
+
45
+ protected
46
+
47
+ def buffer_klass
48
+ @buffer_klass ||= FluQ::Buffer.const_get(config[:buffer].to_s.capitalize)
49
+ end
50
+
51
+ def feed_klass
52
+ @feed_klass ||= FluQ::Feed.const_get(config[:feed].to_s.capitalize)
53
+ end
54
+
55
+ def defaults
56
+ { buffer: "file", feed: "msgpack", buffer_options: {} }
57
+ end
58
+
59
+ end