druiddb 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 39e6d627318a91e94886eb34b4d05b349d79153f
4
+ data.tar.gz: b6bc938cd4175a50e2aaa630123a03faf5119660
5
+ SHA512:
6
+ metadata.gz: 202fd72edd5740aa09727d7eec183cc79ddaf2cfbd9bc529e7d908252039c78354c89ba7a58bb19f0fc550972f866d39abe9f8291fbc53bb956ec5260137a793
7
+ data.tar.gz: fb94c7c1e92b3a653d76851c420f8368b27d6a6dfd63402cf7bb8cca6d5e2247d028dc9fde074b0b5c943911d99e8c93bc330616fbfb487ea3691a18238fe552
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /example
11
+ zookeeper.out
12
+ jruby-druid.log
13
+ .ruby-version
14
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Andre LeBlanc
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1 @@
1
+ # ruby-druid
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "irb"
5
+ require "druiddb"
6
+
7
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'druid/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "druiddb"
8
+ spec.version = RubyDruid::VERSION
9
+ spec.authors = ["Andre LeBlanc"]
10
+ spec.email = ["andre.leblanc88@gmail.com"]
11
+
12
+ spec.summary = 'Ruby adapter for Druid.'
13
+ spec.description = 'Ruby adapter for Druid that allows reads and writes using the Tranquility Kafka API.'
14
+ spec.homepage = "https://github.com/andremleblanc/ruby-druid"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency "activesupport", '~> 5.0'
23
+ spec.add_dependency "ruby-kafka", '~> 0.3'
24
+ spec.add_dependency "zk", '~> 1.9'
25
+
26
+ spec.add_development_dependency "bundler", '~> 1.7'
27
+ spec.add_development_dependency "rake", '~> 10.0'
28
+ end
@@ -0,0 +1,20 @@
1
+ # Druid
2
+ This module contains all logic associated with Druid.
3
+
4
+ ## Node
5
+ The `Node` classes represent Druid nodes and manage connection with them. They
6
+ also provide the methods that are exposed natively by the Druid REST API.
7
+
8
+ ## Query
9
+ The query module provides a way for the `Druid::Client` to inherit the methods
10
+ from the `Node` classes. Additionally, the `Query` module classes provide some
11
+ additional methods not found natively in the Druid REST API.
12
+
13
+ ## Writer
14
+ The `Writer` classes utilize the Tranquility Kafka API to communicate with Druid
15
+ nodes and allows writing.
16
+
17
+ ## Errors
18
+ **Client Error:** Indicates a failure within the Ruby-Druid adapter.
19
+ **Connection Error:** Indicates a failed request to Druid.
20
+ **QueryError:** Indicates a malformed query.
@@ -0,0 +1,22 @@
1
+ module Druid
2
+ class Client
3
+ include Druid::Queries::Core
4
+ include Druid::Queries::Task
5
+
6
+ attr_reader :broker,
7
+ :config,
8
+ :coordinator,
9
+ :overlord,
10
+ :writer,
11
+ :zk
12
+
13
+ def initialize(options = {})
14
+ @config = Druid::Configuration.new(options)
15
+ @zk = Druid::ZK.new(config)
16
+ @broker = Druid::Node::Broker.new(config, zk)
17
+ @coordinator = Druid::Node::Coordinator.new(config, zk)
18
+ @overlord = Druid::Node::Overlord.new(config, zk)
19
+ @writer = Druid::Writer.new(config, zk)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,51 @@
1
+ module Druid
2
+ class Configuration
3
+ DISCOVERY_PATH = '/druid/discovery'.freeze
4
+ INDEX_SERVICE = 'druid/overlord'.freeze
5
+ KAFKA_BROKER_PATH = '/brokers/ids'.freeze
6
+ LOG_LEVEL = :error
7
+ ROLLUP_GRANULARITY = :minute
8
+ STRONG_DELETE = false # Not recommend to be true for production.
9
+ TUNING_GRANULARITY = :day
10
+ TUNING_WINDOW = 'PT1H'.freeze
11
+ WAIT_TIME = 20 # Seconds
12
+ ZOOKEEPER = 'localhost:2181'.freeze
13
+
14
+ attr_reader :discovery_path,
15
+ :index_service,
16
+ :kafka_broker_path,
17
+ :log_level,
18
+ :rollup_granularity,
19
+ :strong_delete,
20
+ :tuning_granularity,
21
+ :tuning_window,
22
+ :wait_time,
23
+ :zookeeper
24
+
25
+
26
+ def initialize(opts = {})
27
+ @discovery_path = opts[:discovery_path] || DISCOVERY_PATH
28
+ @index_service = opts[:index_service] || INDEX_SERVICE
29
+ @kafka_broker_path = opts[:kafka_broker_path] || KAFKA_BROKER_PATH
30
+ @log_level = opts[:log_level] || LOG_LEVEL
31
+ @rollup_granularity = rollup_granularity_string(opts[:rollup_granularity])
32
+ @strong_delete = opts[:strong_delete] || STRONG_DELETE
33
+ @tuning_granularity = tuning_granularity_string(opts[:tuning_granularity])
34
+ @tuning_window = opts[:tuning_window] || TUNING_WINDOW
35
+ @wait_time = opts[:wait_time] || WAIT_TIME
36
+ @zookeeper = opts[:zookeeper] || ZOOKEEPER
37
+ end
38
+
39
+ private
40
+
41
+ def rollup_granularity_string(input)
42
+ output_string = input || ROLLUP_GRANULARITY
43
+ output_string.to_s.upcase.freeze
44
+ end
45
+
46
+ def tuning_granularity_string(input)
47
+ output_string = input || TUNING_GRANULARITY
48
+ output_string.to_s.upcase.freeze
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,70 @@
1
+ # Based on: http://danknox.github.io/2013/01/27/using-rubys-native-nethttp-library/
2
+ require 'net/http'
3
+
4
+ module Druid
5
+ class Connection
6
+ CONTENT_TYPE = 'application/json'.freeze
7
+ VERB_MAP = {
8
+ :get => ::Net::HTTP::Get,
9
+ :post => ::Net::HTTP::Post,
10
+ :put => ::Net::HTTP::Put,
11
+ :delete => ::Net::HTTP::Delete
12
+ }
13
+
14
+ attr_reader :http
15
+
16
+ def initialize(endpoint)
17
+ if endpoint.is_a? String
18
+ uri = URI.parse(endpoint)
19
+ host, port = uri.host, uri.port
20
+ else
21
+ host, port = endpoint.values_at(:host, :port)
22
+ end
23
+
24
+ @http = ::Net::HTTP.new(host, port)
25
+ end
26
+
27
+ def get(path, params = {})
28
+ request :get, path, params
29
+ end
30
+
31
+ def post(path, params = {})
32
+ request :post, path, params
33
+ end
34
+
35
+ def put(path, params = {})
36
+ request :put, path, params
37
+ end
38
+
39
+ def delete(path, params = {})
40
+ request :delete, path, params
41
+ end
42
+
43
+ private
44
+
45
+ def encode_path_params(path, params)
46
+ encoded = URI.encode_www_form(params)
47
+ [path, encoded].join("?")
48
+ end
49
+
50
+ def request(method, path, params)
51
+ case method
52
+ when :get
53
+ full_path = encode_path_params(path, params)
54
+ request = VERB_MAP[method].new(full_path)
55
+ else
56
+ request = VERB_MAP[method].new(path)
57
+ request.body = params.to_json
58
+ end
59
+
60
+ request.content_type = CONTENT_TYPE
61
+ begin
62
+ response = http.request(request)
63
+ rescue Timeout::Error, *Druid::NET_HTTP_EXCEPTIONS => e
64
+ raise ConnectionError, e.message
65
+ end
66
+
67
+ response
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,22 @@
1
+ module Druid
2
+ class Error < StandardError; end
3
+ class ClientError < Error; end
4
+ class ConnectionError < Error; end
5
+ class QueryError < Error; end
6
+ class ValidationError < Error; end
7
+
8
+ # Adopted from: https://github.com/lostisland/faraday/blob/master/lib/faraday/adapter/net_http.rb
9
+ NET_HTTP_EXCEPTIONS = [
10
+ EOFError,
11
+ Errno::ECONNABORTED,
12
+ Errno::ECONNREFUSED,
13
+ Errno::ECONNRESET,
14
+ Errno::EHOSTUNREACH,
15
+ Errno::EINVAL,
16
+ Errno::ENETUNREACH,
17
+ Net::HTTPBadResponse,
18
+ Net::HTTPHeaderSyntaxError,
19
+ Net::ProtocolError,
20
+ SocketError
21
+ ]
22
+ end
@@ -0,0 +1,35 @@
1
+ module Druid
2
+ module Node
3
+ class Broker
4
+ QUERY_PATH = '/druid/v2'.freeze
5
+
6
+ attr_reader :config, :zk
7
+ def initialize(config, zk)
8
+ @config = config
9
+ @zk = zk
10
+ end
11
+
12
+ #TODO: Would caching connections be beneficial?
13
+ def connection
14
+ broker = zk.registry["#{config.discovery_path}/druid:broker"].first
15
+ raise Druid::ConnectionError, 'no druid brokers available' if broker.nil?
16
+ zk.registry["#{config.discovery_path}/druid:broker"].rotate! # round-robin load balancing
17
+ Druid::Connection.new(host: broker[:host], port: broker[:port])
18
+ end
19
+
20
+ def query(query_object)
21
+ begin
22
+ response = connection.post(QUERY_PATH, query_object)
23
+ rescue Druid::ConnectionError => e
24
+ # TODO: This sucks, make it better
25
+ (zk.registry["#{config.discovery_path}/druid:broker"].size - 1).times do
26
+ response = connection.post(QUERY_PATH, query_object)
27
+ break if response.code.to_i == 200
28
+ end
29
+ end
30
+ raise QueryError unless response.code.to_i == 200
31
+ JSON.parse(response.body)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,117 @@
1
+ module Druid
2
+ module Node
3
+ class Coordinator
4
+ DATASOURCES_PATH = '/druid/coordinator/v1/datasources/'.freeze
5
+
6
+ attr_reader :config, :zk
7
+ def initialize(config, zk)
8
+ @config = config
9
+ @zk = zk
10
+ end
11
+
12
+ # TODO: DRY; copy/paste from broker
13
+ def connection
14
+ coordinator = zk.registry["#{config.discovery_path}/druid:coordinator"].first
15
+ raise Druid::ConnectionError, 'no druid coordinators available' if coordinator.nil?
16
+ zk.registry["#{config.discovery_path}/druid:coordinator"].rotate! # round-robin load balancing
17
+ Druid::Connection.new(host: coordinator[:host], port: coordinator[:port])
18
+ end
19
+
20
+ def datasource_info(datasource_name)
21
+ response = connection.get(DATASOURCES_PATH + datasource_name.to_s, full: true)
22
+ raise ConnectionError, 'Unable to retrieve datasource information.' unless response.code.to_i == 200
23
+ JSON.parse(response.body)
24
+ end
25
+
26
+ def disable_datasource(datasource_name)
27
+ # response = connection.delete(DATASOURCES_PATH + datasource_name.to_s)
28
+ # raise ConnectionError, 'Unable to disable datasource' unless response.code.to_i == 200
29
+ # return true if response.code.to_i == 200
30
+
31
+ # This is a workaround for https://github.com/druid-io/druid/issues/3154
32
+ disable_segments(datasource_name)
33
+ bounded_wait_for_segments_disable(datasource_name)
34
+ true
35
+ end
36
+
37
+ # TODO: This should either be private or moved to datasource
38
+ def datasource_enabled?(datasource_name)
39
+ list_datasources.include? datasource_name
40
+ end
41
+
42
+ # TODO: This should either be private or moved to datasource
43
+ def datasource_has_segments?(datasource_name)
44
+ list_segments(datasource_name).any?
45
+ end
46
+
47
+ def disable_segment(datasource_name, segment)
48
+ response = connection.delete(DATASOURCES_PATH + datasource_name + '/segments/' + segment)
49
+ raise ConnectionError, "Unable to disable #{segment}" unless response.code.to_i == 200
50
+ true
51
+ end
52
+
53
+ # TODO: This should either be private or moved to datasource
54
+ def disable_segments(datasource_name)
55
+ segments = list_segments(datasource_name)
56
+ segments.each{ |segment| disable_segment(datasource_name, segment) }
57
+ end
58
+
59
+ def issue_kill_task(datasource_name, interval)
60
+ response = connection.delete(DATASOURCES_PATH + datasource_name + '/intervals/' + interval)
61
+ raise ConnectionError, 'Unable to issue kill task.' unless response.code.to_i == 200
62
+ true
63
+ end
64
+
65
+ def list_datasources(url_params = {})
66
+ response = connection.get(DATASOURCES_PATH, url_params)
67
+ JSON.parse(response.body) if response.code.to_i == 200
68
+ end
69
+
70
+ def list_segments(datasource_name)
71
+ response = connection.get(DATASOURCES_PATH + datasource_name + '/segments', full: true)
72
+ case response.code.to_i
73
+ when 200
74
+ JSON.parse(response.body).map{ |segment| segment['identifier'] }
75
+ when 204
76
+ []
77
+ else
78
+ raise ConnectionError, "Unable to list segments for #{datasource_name}"
79
+ end
80
+ end
81
+
82
+ private
83
+
84
+ def bounded_wait_for_disable(datasource_name)
85
+ condition = datasource_enabled?(datasource_name)
86
+ attempts = 0
87
+ max = 10
88
+
89
+ while(condition) do
90
+ attempts += 1
91
+ sleep 1
92
+ condition = datasource_enabled?(datasource_name)
93
+ break if attempts >= max
94
+ end
95
+
96
+ raise ClientError, 'Datasource should be disabled, but is still enabled.' unless condition
97
+ true
98
+ end
99
+
100
+ def bounded_wait_for_segments_disable(datasource_name)
101
+ condition = datasource_has_segments?(datasource_name)
102
+ attempts = 0
103
+ max = 60
104
+
105
+ while(condition) do
106
+ attempts += 1
107
+ sleep 1
108
+ condition = datasource_has_segments?(datasource_name)
109
+ break if attempts >= max
110
+ end
111
+
112
+ raise ClientError, 'Segments should be disabled, but are still enabled.' if condition
113
+ true
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,60 @@
1
+ module Druid
2
+ module Node
3
+ class Overlord
4
+ INDEXER_PATH = '/druid/indexer/v1/'.freeze
5
+ RUNNING_TASKS_PATH = (INDEXER_PATH + 'runningTasks').freeze
6
+ TASK_PATH = INDEXER_PATH + 'task/'
7
+
8
+ attr_reader :config, :zk
9
+ def initialize(config, zk)
10
+ @config = config
11
+ @zk = zk
12
+ end
13
+
14
+ #TODO: DRY: copy/paste
15
+ def connection
16
+ overlord = zk.registry["#{config.discovery_path}/druid:overlord"].first
17
+ raise Druid::ConnectionError, 'no druid overlords available' if overlord.nil?
18
+ zk.registry["#{config.discovery_path}/druid:overlord"].rotate! # round-robin load balancing
19
+ Druid::Connection.new(host: overlord[:host], port: overlord[:port])
20
+ end
21
+
22
+ def running_tasks(datasource_name = nil)
23
+ response = connection.get(RUNNING_TASKS_PATH)
24
+ raise ConnectionError, 'Could not retrieve running tasks' unless response.code.to_i == 200
25
+ tasks = JSON.parse(response.body).map{|task| task['id']}
26
+ tasks.select!{ |task| task.include? datasource_name } if datasource_name
27
+ tasks ? tasks : []
28
+ end
29
+
30
+ def shutdown_task(task)
31
+ response = connection.post(TASK_PATH + task + '/shutdown')
32
+ raise ConnectionError, 'Unable to shutdown task' unless response.code.to_i == 200
33
+ bounded_wait_for_shutdown(task)
34
+ end
35
+
36
+ def shutdown_tasks(datasource_name = nil)
37
+ tasks = running_tasks(datasource_name)
38
+ tasks.each{|task| shutdown_task(task)}
39
+ end
40
+
41
+ private
42
+
43
+ def bounded_wait_for_shutdown(task)
44
+ condition = !(running_tasks.include? task)
45
+ attempts = 0
46
+ max = 10
47
+
48
+ until(condition) do
49
+ attempts += 1
50
+ sleep 1
51
+ condition = !(running_tasks.include? task)
52
+ break if attempts >= max
53
+ end
54
+
55
+ raise ClientError, 'Task did not shutdown.' unless condition
56
+ true
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,11 @@
1
+ module Druid
2
+ module Queries
3
+ module Core
4
+ delegate :write_point, to: :writer
5
+
6
+ def query(opts)
7
+ Druid::Query.create(opts.merge(broker: broker))
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,7 @@
1
+ module Druid
2
+ module Queries
3
+ module Task
4
+ delegate :shutdown_tasks, to: :overlord
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,182 @@
1
+ module Druid
2
+ class Query
3
+ attr_reader :aggregations,
4
+ :broker,
5
+ :dimensions,
6
+ :end_interval,
7
+ :fill_value,
8
+ :granularity,
9
+ :query_opts,
10
+ :query_type,
11
+ :range,
12
+ :result_key,
13
+ :start_interval
14
+
15
+ def initialize(opts)
16
+ @aggregations = opts[:aggregations].map{|agg| agg[:name]}
17
+ @broker = opts[:broker]
18
+ @dimensions = opts[:dimensions]
19
+ @fill_value = opts[:fill_value]
20
+ @granularity = opts[:granularity]
21
+ @range = parse_range(opts[:intervals])
22
+ @query_type = opts[:queryType]
23
+ @end_interval = calculate_end_interval
24
+ @start_interval = calculate_start_interval
25
+ @query_opts = opts_for_query(opts)
26
+ end
27
+
28
+ def execute
29
+ result = broker.query(query_opts)
30
+ fill_query_results(result)
31
+ end
32
+
33
+ private
34
+
35
+ # TODO: Can this be made smarter? Prefer to avoid case statements.
36
+ # Cases found here: http://druid.io/docs/latest/querying/granularities.html
37
+ def advance_interval(time)
38
+ case granularity
39
+ when 'second'
40
+ time.advance(seconds: 1)
41
+ when 'minute'
42
+ time.advance(minutes: 1)
43
+ when 'fifteen_minute'
44
+ time.advance(minutes: 15)
45
+ when 'thirty_minute'
46
+ time.advance(minutes: 30)
47
+ when 'hour'
48
+ time.advance(hours: 1)
49
+ when 'day'
50
+ time.advance(days: 1)
51
+ when 'week'
52
+ time.advance(weeks: 1)
53
+ when 'month'
54
+ time.advance(months: 1)
55
+ when 'quarter'
56
+ time.advance(months: 3)
57
+ when 'year'
58
+ time.advance(years: 1)
59
+ else
60
+ raise Druid::QueryError, 'Unsupported granularity'
61
+ end
62
+ end
63
+
64
+ def calculate_end_interval
65
+ iso8601_duration_end_interval(range)
66
+ end
67
+
68
+ def calculate_start_interval
69
+ time = iso8601_duration_start_interval(range)
70
+ start_of_interval(time)
71
+ end
72
+
73
+ def fill_empty_intervals(points, opts = {})
74
+ interval = start_interval
75
+ result = []
76
+
77
+ while interval <= end_interval do
78
+ # TODO:
79
+ # This will search the points every time, could be more performant if
80
+ # we track the 'current point' in the points and only compare the
81
+ # current point's timestamp
82
+ point = find_or_create_point(interval, points)
83
+ aggregations.each do |aggregation|
84
+ point[result_key][aggregation] = fill_value if point[result_key][aggregation].blank?
85
+ point[result_key].merge!(opts)
86
+ end
87
+ result << point
88
+ interval = advance_interval(interval)
89
+ end
90
+
91
+ result
92
+ end
93
+
94
+ # NOTE:
95
+ # This responsibility really lies in Druid, but until the feature works
96
+ # reliably in Druid, this is serves the purpose.
97
+ # https://github.com/druid-io/druid/issues/2106
98
+ def fill_query_results(query_result)
99
+ return query_result unless query_result.present? && fill_value.present?
100
+ parse_result_key(query_result.first)
101
+
102
+ #TODO: handle multi-dimensional group by
103
+ if group_by?
104
+ result = []
105
+ dimension_key = dimensions.first
106
+ groups = query_result.group_by{ |point| point[result_key][dimension_key] }
107
+ groups.each do |dimension_value, dimension_points|
108
+ result += fill_empty_intervals(dimension_points, { dimension_key => dimension_value })
109
+ end
110
+ result
111
+ else
112
+ fill_empty_intervals(query_result)
113
+ end
114
+ end
115
+
116
+ def find_or_create_point(interval, points)
117
+ point = points.find{ |point| point['timestamp'].to_s.to_time == interval.to_time }
118
+ point.present? ? point : { 'timestamp' => interval.iso8601(3), result_key => {} }
119
+ end
120
+
121
+ def group_by?
122
+ query_type == 'groupBy'
123
+ end
124
+
125
+ def iso8601_duration_start_interval(duration)
126
+ duration.split('/').first.to_time.utc
127
+ end
128
+
129
+ def iso8601_duration_end_interval(duration)
130
+ duration.split('/').last.to_time.utc
131
+ end
132
+
133
+ def opts_for_query(opts)
134
+ opts.except(:fill_value, :broker)
135
+ end
136
+
137
+ def parse_range(range)
138
+ range.is_a?(Array) ? range.first : range
139
+ end
140
+
141
+ def parse_result_key(point)
142
+ @result_key = point['event'].present? ? 'event' : 'result'
143
+ end
144
+
145
+ # TODO: Can this be made smarter? Prefer to avoid case statements.
146
+ # Cases found here: http://druid.io/docs/latest/querying/granularities.html
147
+ def start_of_interval(time)
148
+ case granularity
149
+ when 'second'
150
+ time.change(usec: 0)
151
+ when 'minute'
152
+ time.beginning_of_minute
153
+ when 'fifteen_minute'
154
+ first_fifteen = [45, 30, 15, 0].detect{ |m| m <= time.min }
155
+ time.change(min: first_fifteen)
156
+ when 'thirty_minute'
157
+ first_thirty = [30, 0].detect{ |m| m <= time.min }
158
+ time.change(min: first_thirty)
159
+ when 'hour'
160
+ time.beginning_of_hour
161
+ when 'day'
162
+ time.beginning_of_day
163
+ when 'week'
164
+ time.beginning_of_week
165
+ when 'month'
166
+ time.beginning_of_month
167
+ when 'quarter'
168
+ time.beginning_of_quarter
169
+ when 'year'
170
+ time.beginning_of_year
171
+ else
172
+ time
173
+ end
174
+ end
175
+
176
+ class << self
177
+ def create(opts)
178
+ new(opts).execute
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,3 @@
1
+ module RubyDruid
2
+ VERSION = '1.0.0'
3
+ end
@@ -0,0 +1,47 @@
1
+ #TODO: Seems to be a delay after shutting down Kafka and ZK updating
2
+ module Druid
3
+ class Writer
4
+ attr_reader :config, :producer, :zk
5
+ def initialize(config, zk)
6
+ @config = config
7
+ @zk = zk
8
+ init_producer
9
+ zk.register_listener(self, :handle_kafka_state_change)
10
+ end
11
+
12
+ def write_point(datasource, datapoint)
13
+ raise Druid::ConnectionError, 'no kafka brokers available' if producer.nil?
14
+ producer.produce(datapoint, topic: datasource)
15
+ end
16
+
17
+ private
18
+
19
+ def broker_list
20
+ zk.registry["/brokers/ids"].map{|instance| "#{instance[:host]}:#{instance[:port]}" }.join(',')
21
+ end
22
+
23
+ def handle_kafka_state_change(service)
24
+ if service == config.kafka_broker_path
25
+ producer.shutdown
26
+ init_producer
27
+ end
28
+ end
29
+
30
+ def init_producer
31
+ producer_options = {
32
+ seed_brokers: broker_list,
33
+ client_id: "ruby-druid"
34
+ }
35
+
36
+ if broker_list.present?
37
+ kafka = Kafka.new(producer_options)
38
+ producer = kafka.async_producer(delivery_threshold: 100, delivery_interval: 10)
39
+ producer.deliver_messages
40
+ else
41
+ producer = nil
42
+ end
43
+
44
+ @producer = producer
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,63 @@
1
+ module Druid
2
+ class ZK
3
+ attr_accessor :registry
4
+ attr_reader :client, :config, :listeners
5
+
6
+ #TODO: Test and handle ZK partitions
7
+ def initialize(config)
8
+ @client = ::ZK.new(config.zookeeper)
9
+ @config = config
10
+ @listeners = []
11
+ @registry = {}
12
+ register
13
+ end
14
+
15
+ def register_listener(object, method)
16
+ listeners << ->(*args) { object.send(method, *args) }
17
+ end
18
+
19
+ private
20
+
21
+ def announce(service)
22
+ # puts "announcing #{service}"
23
+ listeners.each { |listener| listener.call(service) }
24
+ end
25
+
26
+ def register
27
+ register_service("#{config.discovery_path}/druid:broker")
28
+ register_service("#{config.discovery_path}/druid:coordinator")
29
+ register_service("#{config.discovery_path}/druid:overlord")
30
+ register_service("#{config.kafka_broker_path}")
31
+ end
32
+
33
+ def register_service(service)
34
+ # puts "registering #{service}"
35
+ #TODO: Thead safety, lock this registry key
36
+ subscribe_to_service(service)
37
+ renew_service_instances(service)
38
+ end
39
+
40
+ def renew_service_instances(service)
41
+ # puts "activating registered subscriptions on #{service}"
42
+ instances = client.children(service, watch: true)
43
+
44
+ # puts "emptying #{service} from registry"
45
+ registry[service] = []
46
+ instances.each do |instance|
47
+ data = JSON.parse(client.get("#{service}/#{instance}").first)
48
+ host = data['address'] || data['host']
49
+ port = data['port']
50
+ # puts "adding #{host}:#{port} to registry for #{service}"
51
+ registry[service] << { host: host, port: port }
52
+ end
53
+ end
54
+
55
+ def subscribe_to_service(service)
56
+ subscription = client.register(service) do |event|
57
+ # puts "watched event for #{service} detected"
58
+ renew_service_instances(event.path)
59
+ announce(event.path)
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,21 @@
1
+ require "active_support/all"
2
+ require "ruby-kafka"
3
+ require "json"
4
+ require "zk"
5
+
6
+ require "druid/configuration"
7
+ require "druid/connection"
8
+ require "druid/errors"
9
+ require "druid/query"
10
+ require "druid/version"
11
+ require "druid/zk"
12
+
13
+ require "druid/node/broker"
14
+ require "druid/node/coordinator"
15
+ require "druid/node/overlord"
16
+
17
+ require "druid/queries/core"
18
+ require "druid/queries/task"
19
+
20
+ require "druid/writer"
21
+ require "druid/client"
metadata ADDED
@@ -0,0 +1,138 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: druiddb
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Andre LeBlanc
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-07-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: ruby-kafka
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.3'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: zk
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.9'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.9'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.7'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ description: Ruby adapter for Druid that allows reads and writes using the Tranquility
84
+ Kafka API.
85
+ email:
86
+ - andre.leblanc88@gmail.com
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - bin/console
97
+ - bin/setup
98
+ - druiddb.gemspec
99
+ - lib/druid/README.md
100
+ - lib/druid/client.rb
101
+ - lib/druid/configuration.rb
102
+ - lib/druid/connection.rb
103
+ - lib/druid/errors.rb
104
+ - lib/druid/node/broker.rb
105
+ - lib/druid/node/coordinator.rb
106
+ - lib/druid/node/overlord.rb
107
+ - lib/druid/queries/core.rb
108
+ - lib/druid/queries/task.rb
109
+ - lib/druid/query.rb
110
+ - lib/druid/version.rb
111
+ - lib/druid/writer.rb
112
+ - lib/druid/zk.rb
113
+ - lib/druiddb.rb
114
+ homepage: https://github.com/andremleblanc/ruby-druid
115
+ licenses:
116
+ - MIT
117
+ metadata: {}
118
+ post_install_message:
119
+ rdoc_options: []
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ requirements: []
133
+ rubyforge_project:
134
+ rubygems_version: 2.6.12
135
+ signing_key:
136
+ specification_version: 4
137
+ summary: Ruby adapter for Druid.
138
+ test_files: []