druiddb 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +21 -0
- data/README.md +1 -0
- data/Rakefile +6 -0
- data/bin/console +7 -0
- data/bin/setup +8 -0
- data/druiddb.gemspec +28 -0
- data/lib/druid/README.md +20 -0
- data/lib/druid/client.rb +22 -0
- data/lib/druid/configuration.rb +51 -0
- data/lib/druid/connection.rb +70 -0
- data/lib/druid/errors.rb +22 -0
- data/lib/druid/node/broker.rb +35 -0
- data/lib/druid/node/coordinator.rb +117 -0
- data/lib/druid/node/overlord.rb +60 -0
- data/lib/druid/queries/core.rb +11 -0
- data/lib/druid/queries/task.rb +7 -0
- data/lib/druid/query.rb +182 -0
- data/lib/druid/version.rb +3 -0
- data/lib/druid/writer.rb +47 -0
- data/lib/druid/zk.rb +63 -0
- data/lib/druiddb.rb +21 -0
- metadata +138 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 39e6d627318a91e94886eb34b4d05b349d79153f
|
4
|
+
data.tar.gz: b6bc938cd4175a50e2aaa630123a03faf5119660
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 202fd72edd5740aa09727d7eec183cc79ddaf2cfbd9bc529e7d908252039c78354c89ba7a58bb19f0fc550972f866d39abe9f8291fbc53bb956ec5260137a793
|
7
|
+
data.tar.gz: fb94c7c1e92b3a653d76851c420f8368b27d6a6dfd63402cf7bb8cca6d5e2247d028dc9fde074b0b5c943911d99e8c93bc330616fbfb487ea3691a18238fe552
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Andre LeBlanc
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# ruby-druid
|
data/Rakefile
ADDED
data/bin/console
ADDED
data/bin/setup
ADDED
data/druiddb.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'druid/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "druiddb"
|
8
|
+
spec.version = RubyDruid::VERSION
|
9
|
+
spec.authors = ["Andre LeBlanc"]
|
10
|
+
spec.email = ["andre.leblanc88@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = 'Ruby adapter for Druid.'
|
13
|
+
spec.description = 'Ruby adapter for Druid that allows reads and writes using the Tranquility Kafka API.'
|
14
|
+
spec.homepage = "https://github.com/andremleblanc/ruby-druid"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency "activesupport", '~> 5.0'
|
23
|
+
spec.add_dependency "ruby-kafka", '~> 0.3'
|
24
|
+
spec.add_dependency "zk", '~> 1.9'
|
25
|
+
|
26
|
+
spec.add_development_dependency "bundler", '~> 1.7'
|
27
|
+
spec.add_development_dependency "rake", '~> 10.0'
|
28
|
+
end
|
data/lib/druid/README.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# Druid
|
2
|
+
This module contains all logic associated with Druid.
|
3
|
+
|
4
|
+
## Node
|
5
|
+
The `Node` classes represent Druid nodes and manage connection with them. They
|
6
|
+
also provide the methods that are exposed natively by the Druid REST API.
|
7
|
+
|
8
|
+
## Query
|
9
|
+
The query module provides a way for the `Druid::Client` to inherit the methods
|
10
|
+
from the `Node` classes. Additionally, the `Query` module classes provide some
|
11
|
+
additional methods not found natively in the Druid REST API.
|
12
|
+
|
13
|
+
## Writer
|
14
|
+
The `Writer` classes utilize the Tranquility Kafka API to communicate with Druid
|
15
|
+
nodes and allows writing.
|
16
|
+
|
17
|
+
## Errors
|
18
|
+
**Client Error:** Indicates a failure within the Ruby-Druid adapter.
|
19
|
+
**Connection Error:** Indicates a failed request to Druid.
|
20
|
+
**QueryError:** Indicates a malformed query.
|
data/lib/druid/client.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
module Druid
|
2
|
+
class Client
|
3
|
+
include Druid::Queries::Core
|
4
|
+
include Druid::Queries::Task
|
5
|
+
|
6
|
+
attr_reader :broker,
|
7
|
+
:config,
|
8
|
+
:coordinator,
|
9
|
+
:overlord,
|
10
|
+
:writer,
|
11
|
+
:zk
|
12
|
+
|
13
|
+
def initialize(options = {})
|
14
|
+
@config = Druid::Configuration.new(options)
|
15
|
+
@zk = Druid::ZK.new(config)
|
16
|
+
@broker = Druid::Node::Broker.new(config, zk)
|
17
|
+
@coordinator = Druid::Node::Coordinator.new(config, zk)
|
18
|
+
@overlord = Druid::Node::Overlord.new(config, zk)
|
19
|
+
@writer = Druid::Writer.new(config, zk)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module Druid
|
2
|
+
class Configuration
|
3
|
+
DISCOVERY_PATH = '/druid/discovery'.freeze
|
4
|
+
INDEX_SERVICE = 'druid/overlord'.freeze
|
5
|
+
KAFKA_BROKER_PATH = '/brokers/ids'.freeze
|
6
|
+
LOG_LEVEL = :error
|
7
|
+
ROLLUP_GRANULARITY = :minute
|
8
|
+
STRONG_DELETE = false # Not recommend to be true for production.
|
9
|
+
TUNING_GRANULARITY = :day
|
10
|
+
TUNING_WINDOW = 'PT1H'.freeze
|
11
|
+
WAIT_TIME = 20 # Seconds
|
12
|
+
ZOOKEEPER = 'localhost:2181'.freeze
|
13
|
+
|
14
|
+
attr_reader :discovery_path,
|
15
|
+
:index_service,
|
16
|
+
:kafka_broker_path,
|
17
|
+
:log_level,
|
18
|
+
:rollup_granularity,
|
19
|
+
:strong_delete,
|
20
|
+
:tuning_granularity,
|
21
|
+
:tuning_window,
|
22
|
+
:wait_time,
|
23
|
+
:zookeeper
|
24
|
+
|
25
|
+
|
26
|
+
def initialize(opts = {})
|
27
|
+
@discovery_path = opts[:discovery_path] || DISCOVERY_PATH
|
28
|
+
@index_service = opts[:index_service] || INDEX_SERVICE
|
29
|
+
@kafka_broker_path = opts[:kafka_broker_path] || KAFKA_BROKER_PATH
|
30
|
+
@log_level = opts[:log_level] || LOG_LEVEL
|
31
|
+
@rollup_granularity = rollup_granularity_string(opts[:rollup_granularity])
|
32
|
+
@strong_delete = opts[:strong_delete] || STRONG_DELETE
|
33
|
+
@tuning_granularity = tuning_granularity_string(opts[:tuning_granularity])
|
34
|
+
@tuning_window = opts[:tuning_window] || TUNING_WINDOW
|
35
|
+
@wait_time = opts[:wait_time] || WAIT_TIME
|
36
|
+
@zookeeper = opts[:zookeeper] || ZOOKEEPER
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def rollup_granularity_string(input)
|
42
|
+
output_string = input || ROLLUP_GRANULARITY
|
43
|
+
output_string.to_s.upcase.freeze
|
44
|
+
end
|
45
|
+
|
46
|
+
def tuning_granularity_string(input)
|
47
|
+
output_string = input || TUNING_GRANULARITY
|
48
|
+
output_string.to_s.upcase.freeze
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# Based on: http://danknox.github.io/2013/01/27/using-rubys-native-nethttp-library/
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
module Druid
|
5
|
+
class Connection
|
6
|
+
CONTENT_TYPE = 'application/json'.freeze
|
7
|
+
VERB_MAP = {
|
8
|
+
:get => ::Net::HTTP::Get,
|
9
|
+
:post => ::Net::HTTP::Post,
|
10
|
+
:put => ::Net::HTTP::Put,
|
11
|
+
:delete => ::Net::HTTP::Delete
|
12
|
+
}
|
13
|
+
|
14
|
+
attr_reader :http
|
15
|
+
|
16
|
+
def initialize(endpoint)
|
17
|
+
if endpoint.is_a? String
|
18
|
+
uri = URI.parse(endpoint)
|
19
|
+
host, port = uri.host, uri.port
|
20
|
+
else
|
21
|
+
host, port = endpoint.values_at(:host, :port)
|
22
|
+
end
|
23
|
+
|
24
|
+
@http = ::Net::HTTP.new(host, port)
|
25
|
+
end
|
26
|
+
|
27
|
+
def get(path, params = {})
|
28
|
+
request :get, path, params
|
29
|
+
end
|
30
|
+
|
31
|
+
def post(path, params = {})
|
32
|
+
request :post, path, params
|
33
|
+
end
|
34
|
+
|
35
|
+
def put(path, params = {})
|
36
|
+
request :put, path, params
|
37
|
+
end
|
38
|
+
|
39
|
+
def delete(path, params = {})
|
40
|
+
request :delete, path, params
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def encode_path_params(path, params)
|
46
|
+
encoded = URI.encode_www_form(params)
|
47
|
+
[path, encoded].join("?")
|
48
|
+
end
|
49
|
+
|
50
|
+
def request(method, path, params)
|
51
|
+
case method
|
52
|
+
when :get
|
53
|
+
full_path = encode_path_params(path, params)
|
54
|
+
request = VERB_MAP[method].new(full_path)
|
55
|
+
else
|
56
|
+
request = VERB_MAP[method].new(path)
|
57
|
+
request.body = params.to_json
|
58
|
+
end
|
59
|
+
|
60
|
+
request.content_type = CONTENT_TYPE
|
61
|
+
begin
|
62
|
+
response = http.request(request)
|
63
|
+
rescue Timeout::Error, *Druid::NET_HTTP_EXCEPTIONS => e
|
64
|
+
raise ConnectionError, e.message
|
65
|
+
end
|
66
|
+
|
67
|
+
response
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/druid/errors.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
module Druid
|
2
|
+
class Error < StandardError; end
|
3
|
+
class ClientError < Error; end
|
4
|
+
class ConnectionError < Error; end
|
5
|
+
class QueryError < Error; end
|
6
|
+
class ValidationError < Error; end
|
7
|
+
|
8
|
+
# Adopted from: https://github.com/lostisland/faraday/blob/master/lib/faraday/adapter/net_http.rb
|
9
|
+
NET_HTTP_EXCEPTIONS = [
|
10
|
+
EOFError,
|
11
|
+
Errno::ECONNABORTED,
|
12
|
+
Errno::ECONNREFUSED,
|
13
|
+
Errno::ECONNRESET,
|
14
|
+
Errno::EHOSTUNREACH,
|
15
|
+
Errno::EINVAL,
|
16
|
+
Errno::ENETUNREACH,
|
17
|
+
Net::HTTPBadResponse,
|
18
|
+
Net::HTTPHeaderSyntaxError,
|
19
|
+
Net::ProtocolError,
|
20
|
+
SocketError
|
21
|
+
]
|
22
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Druid
|
2
|
+
module Node
|
3
|
+
class Broker
|
4
|
+
QUERY_PATH = '/druid/v2'.freeze
|
5
|
+
|
6
|
+
attr_reader :config, :zk
|
7
|
+
def initialize(config, zk)
|
8
|
+
@config = config
|
9
|
+
@zk = zk
|
10
|
+
end
|
11
|
+
|
12
|
+
#TODO: Would caching connections be beneficial?
|
13
|
+
def connection
|
14
|
+
broker = zk.registry["#{config.discovery_path}/druid:broker"].first
|
15
|
+
raise Druid::ConnectionError, 'no druid brokers available' if broker.nil?
|
16
|
+
zk.registry["#{config.discovery_path}/druid:broker"].rotate! # round-robin load balancing
|
17
|
+
Druid::Connection.new(host: broker[:host], port: broker[:port])
|
18
|
+
end
|
19
|
+
|
20
|
+
def query(query_object)
|
21
|
+
begin
|
22
|
+
response = connection.post(QUERY_PATH, query_object)
|
23
|
+
rescue Druid::ConnectionError => e
|
24
|
+
# TODO: This sucks, make it better
|
25
|
+
(zk.registry["#{config.discovery_path}/druid:broker"].size - 1).times do
|
26
|
+
response = connection.post(QUERY_PATH, query_object)
|
27
|
+
break if response.code.to_i == 200
|
28
|
+
end
|
29
|
+
end
|
30
|
+
raise QueryError unless response.code.to_i == 200
|
31
|
+
JSON.parse(response.body)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
module Druid
|
2
|
+
module Node
|
3
|
+
class Coordinator
|
4
|
+
DATASOURCES_PATH = '/druid/coordinator/v1/datasources/'.freeze
|
5
|
+
|
6
|
+
attr_reader :config, :zk
|
7
|
+
def initialize(config, zk)
|
8
|
+
@config = config
|
9
|
+
@zk = zk
|
10
|
+
end
|
11
|
+
|
12
|
+
# TODO: DRY; copy/paste from broker
|
13
|
+
def connection
|
14
|
+
coordinator = zk.registry["#{config.discovery_path}/druid:coordinator"].first
|
15
|
+
raise Druid::ConnectionError, 'no druid coordinators available' if coordinator.nil?
|
16
|
+
zk.registry["#{config.discovery_path}/druid:coordinator"].rotate! # round-robin load balancing
|
17
|
+
Druid::Connection.new(host: coordinator[:host], port: coordinator[:port])
|
18
|
+
end
|
19
|
+
|
20
|
+
def datasource_info(datasource_name)
|
21
|
+
response = connection.get(DATASOURCES_PATH + datasource_name.to_s, full: true)
|
22
|
+
raise ConnectionError, 'Unable to retrieve datasource information.' unless response.code.to_i == 200
|
23
|
+
JSON.parse(response.body)
|
24
|
+
end
|
25
|
+
|
26
|
+
def disable_datasource(datasource_name)
|
27
|
+
# response = connection.delete(DATASOURCES_PATH + datasource_name.to_s)
|
28
|
+
# raise ConnectionError, 'Unable to disable datasource' unless response.code.to_i == 200
|
29
|
+
# return true if response.code.to_i == 200
|
30
|
+
|
31
|
+
# This is a workaround for https://github.com/druid-io/druid/issues/3154
|
32
|
+
disable_segments(datasource_name)
|
33
|
+
bounded_wait_for_segments_disable(datasource_name)
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
37
|
+
# TODO: This should either be private or moved to datasource
|
38
|
+
def datasource_enabled?(datasource_name)
|
39
|
+
list_datasources.include? datasource_name
|
40
|
+
end
|
41
|
+
|
42
|
+
# TODO: This should either be private or moved to datasource
|
43
|
+
def datasource_has_segments?(datasource_name)
|
44
|
+
list_segments(datasource_name).any?
|
45
|
+
end
|
46
|
+
|
47
|
+
def disable_segment(datasource_name, segment)
|
48
|
+
response = connection.delete(DATASOURCES_PATH + datasource_name + '/segments/' + segment)
|
49
|
+
raise ConnectionError, "Unable to disable #{segment}" unless response.code.to_i == 200
|
50
|
+
true
|
51
|
+
end
|
52
|
+
|
53
|
+
# TODO: This should either be private or moved to datasource
|
54
|
+
def disable_segments(datasource_name)
|
55
|
+
segments = list_segments(datasource_name)
|
56
|
+
segments.each{ |segment| disable_segment(datasource_name, segment) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def issue_kill_task(datasource_name, interval)
|
60
|
+
response = connection.delete(DATASOURCES_PATH + datasource_name + '/intervals/' + interval)
|
61
|
+
raise ConnectionError, 'Unable to issue kill task.' unless response.code.to_i == 200
|
62
|
+
true
|
63
|
+
end
|
64
|
+
|
65
|
+
def list_datasources(url_params = {})
|
66
|
+
response = connection.get(DATASOURCES_PATH, url_params)
|
67
|
+
JSON.parse(response.body) if response.code.to_i == 200
|
68
|
+
end
|
69
|
+
|
70
|
+
def list_segments(datasource_name)
|
71
|
+
response = connection.get(DATASOURCES_PATH + datasource_name + '/segments', full: true)
|
72
|
+
case response.code.to_i
|
73
|
+
when 200
|
74
|
+
JSON.parse(response.body).map{ |segment| segment['identifier'] }
|
75
|
+
when 204
|
76
|
+
[]
|
77
|
+
else
|
78
|
+
raise ConnectionError, "Unable to list segments for #{datasource_name}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def bounded_wait_for_disable(datasource_name)
|
85
|
+
condition = datasource_enabled?(datasource_name)
|
86
|
+
attempts = 0
|
87
|
+
max = 10
|
88
|
+
|
89
|
+
while(condition) do
|
90
|
+
attempts += 1
|
91
|
+
sleep 1
|
92
|
+
condition = datasource_enabled?(datasource_name)
|
93
|
+
break if attempts >= max
|
94
|
+
end
|
95
|
+
|
96
|
+
raise ClientError, 'Datasource should be disabled, but is still enabled.' unless condition
|
97
|
+
true
|
98
|
+
end
|
99
|
+
|
100
|
+
def bounded_wait_for_segments_disable(datasource_name)
|
101
|
+
condition = datasource_has_segments?(datasource_name)
|
102
|
+
attempts = 0
|
103
|
+
max = 60
|
104
|
+
|
105
|
+
while(condition) do
|
106
|
+
attempts += 1
|
107
|
+
sleep 1
|
108
|
+
condition = datasource_has_segments?(datasource_name)
|
109
|
+
break if attempts >= max
|
110
|
+
end
|
111
|
+
|
112
|
+
raise ClientError, 'Segments should be disabled, but are still enabled.' if condition
|
113
|
+
true
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Druid
|
2
|
+
module Node
|
3
|
+
class Overlord
|
4
|
+
INDEXER_PATH = '/druid/indexer/v1/'.freeze
|
5
|
+
RUNNING_TASKS_PATH = (INDEXER_PATH + 'runningTasks').freeze
|
6
|
+
TASK_PATH = INDEXER_PATH + 'task/'
|
7
|
+
|
8
|
+
attr_reader :config, :zk
|
9
|
+
def initialize(config, zk)
|
10
|
+
@config = config
|
11
|
+
@zk = zk
|
12
|
+
end
|
13
|
+
|
14
|
+
#TODO: DRY: copy/paste
|
15
|
+
def connection
|
16
|
+
overlord = zk.registry["#{config.discovery_path}/druid:overlord"].first
|
17
|
+
raise Druid::ConnectionError, 'no druid overlords available' if overlord.nil?
|
18
|
+
zk.registry["#{config.discovery_path}/druid:overlord"].rotate! # round-robin load balancing
|
19
|
+
Druid::Connection.new(host: overlord[:host], port: overlord[:port])
|
20
|
+
end
|
21
|
+
|
22
|
+
def running_tasks(datasource_name = nil)
|
23
|
+
response = connection.get(RUNNING_TASKS_PATH)
|
24
|
+
raise ConnectionError, 'Could not retrieve running tasks' unless response.code.to_i == 200
|
25
|
+
tasks = JSON.parse(response.body).map{|task| task['id']}
|
26
|
+
tasks.select!{ |task| task.include? datasource_name } if datasource_name
|
27
|
+
tasks ? tasks : []
|
28
|
+
end
|
29
|
+
|
30
|
+
def shutdown_task(task)
|
31
|
+
response = connection.post(TASK_PATH + task + '/shutdown')
|
32
|
+
raise ConnectionError, 'Unable to shutdown task' unless response.code.to_i == 200
|
33
|
+
bounded_wait_for_shutdown(task)
|
34
|
+
end
|
35
|
+
|
36
|
+
def shutdown_tasks(datasource_name = nil)
|
37
|
+
tasks = running_tasks(datasource_name)
|
38
|
+
tasks.each{|task| shutdown_task(task)}
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def bounded_wait_for_shutdown(task)
|
44
|
+
condition = !(running_tasks.include? task)
|
45
|
+
attempts = 0
|
46
|
+
max = 10
|
47
|
+
|
48
|
+
until(condition) do
|
49
|
+
attempts += 1
|
50
|
+
sleep 1
|
51
|
+
condition = !(running_tasks.include? task)
|
52
|
+
break if attempts >= max
|
53
|
+
end
|
54
|
+
|
55
|
+
raise ClientError, 'Task did not shutdown.' unless condition
|
56
|
+
true
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/druid/query.rb
ADDED
@@ -0,0 +1,182 @@
|
|
1
|
+
module Druid
|
2
|
+
class Query
|
3
|
+
attr_reader :aggregations,
|
4
|
+
:broker,
|
5
|
+
:dimensions,
|
6
|
+
:end_interval,
|
7
|
+
:fill_value,
|
8
|
+
:granularity,
|
9
|
+
:query_opts,
|
10
|
+
:query_type,
|
11
|
+
:range,
|
12
|
+
:result_key,
|
13
|
+
:start_interval
|
14
|
+
|
15
|
+
def initialize(opts)
|
16
|
+
@aggregations = opts[:aggregations].map{|agg| agg[:name]}
|
17
|
+
@broker = opts[:broker]
|
18
|
+
@dimensions = opts[:dimensions]
|
19
|
+
@fill_value = opts[:fill_value]
|
20
|
+
@granularity = opts[:granularity]
|
21
|
+
@range = parse_range(opts[:intervals])
|
22
|
+
@query_type = opts[:queryType]
|
23
|
+
@end_interval = calculate_end_interval
|
24
|
+
@start_interval = calculate_start_interval
|
25
|
+
@query_opts = opts_for_query(opts)
|
26
|
+
end
|
27
|
+
|
28
|
+
def execute
|
29
|
+
result = broker.query(query_opts)
|
30
|
+
fill_query_results(result)
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
# TODO: Can this be made smarter? Prefer to avoid case statements.
|
36
|
+
# Cases found here: http://druid.io/docs/latest/querying/granularities.html
|
37
|
+
def advance_interval(time)
|
38
|
+
case granularity
|
39
|
+
when 'second'
|
40
|
+
time.advance(seconds: 1)
|
41
|
+
when 'minute'
|
42
|
+
time.advance(minutes: 1)
|
43
|
+
when 'fifteen_minute'
|
44
|
+
time.advance(minutes: 15)
|
45
|
+
when 'thirty_minute'
|
46
|
+
time.advance(minutes: 30)
|
47
|
+
when 'hour'
|
48
|
+
time.advance(hours: 1)
|
49
|
+
when 'day'
|
50
|
+
time.advance(days: 1)
|
51
|
+
when 'week'
|
52
|
+
time.advance(weeks: 1)
|
53
|
+
when 'month'
|
54
|
+
time.advance(months: 1)
|
55
|
+
when 'quarter'
|
56
|
+
time.advance(months: 3)
|
57
|
+
when 'year'
|
58
|
+
time.advance(years: 1)
|
59
|
+
else
|
60
|
+
raise Druid::QueryError, 'Unsupported granularity'
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def calculate_end_interval
|
65
|
+
iso8601_duration_end_interval(range)
|
66
|
+
end
|
67
|
+
|
68
|
+
def calculate_start_interval
|
69
|
+
time = iso8601_duration_start_interval(range)
|
70
|
+
start_of_interval(time)
|
71
|
+
end
|
72
|
+
|
73
|
+
def fill_empty_intervals(points, opts = {})
|
74
|
+
interval = start_interval
|
75
|
+
result = []
|
76
|
+
|
77
|
+
while interval <= end_interval do
|
78
|
+
# TODO:
|
79
|
+
# This will search the points every time, could be more performant if
|
80
|
+
# we track the 'current point' in the points and only compare the
|
81
|
+
# current point's timestamp
|
82
|
+
point = find_or_create_point(interval, points)
|
83
|
+
aggregations.each do |aggregation|
|
84
|
+
point[result_key][aggregation] = fill_value if point[result_key][aggregation].blank?
|
85
|
+
point[result_key].merge!(opts)
|
86
|
+
end
|
87
|
+
result << point
|
88
|
+
interval = advance_interval(interval)
|
89
|
+
end
|
90
|
+
|
91
|
+
result
|
92
|
+
end
|
93
|
+
|
94
|
+
# NOTE:
|
95
|
+
# This responsibility really lies in Druid, but until the feature works
|
96
|
+
# reliably in Druid, this is serves the purpose.
|
97
|
+
# https://github.com/druid-io/druid/issues/2106
|
98
|
+
def fill_query_results(query_result)
|
99
|
+
return query_result unless query_result.present? && fill_value.present?
|
100
|
+
parse_result_key(query_result.first)
|
101
|
+
|
102
|
+
#TODO: handle multi-dimensional group by
|
103
|
+
if group_by?
|
104
|
+
result = []
|
105
|
+
dimension_key = dimensions.first
|
106
|
+
groups = query_result.group_by{ |point| point[result_key][dimension_key] }
|
107
|
+
groups.each do |dimension_value, dimension_points|
|
108
|
+
result += fill_empty_intervals(dimension_points, { dimension_key => dimension_value })
|
109
|
+
end
|
110
|
+
result
|
111
|
+
else
|
112
|
+
fill_empty_intervals(query_result)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def find_or_create_point(interval, points)
|
117
|
+
point = points.find{ |point| point['timestamp'].to_s.to_time == interval.to_time }
|
118
|
+
point.present? ? point : { 'timestamp' => interval.iso8601(3), result_key => {} }
|
119
|
+
end
|
120
|
+
|
121
|
+
def group_by?
|
122
|
+
query_type == 'groupBy'
|
123
|
+
end
|
124
|
+
|
125
|
+
def iso8601_duration_start_interval(duration)
|
126
|
+
duration.split('/').first.to_time.utc
|
127
|
+
end
|
128
|
+
|
129
|
+
def iso8601_duration_end_interval(duration)
|
130
|
+
duration.split('/').last.to_time.utc
|
131
|
+
end
|
132
|
+
|
133
|
+
def opts_for_query(opts)
|
134
|
+
opts.except(:fill_value, :broker)
|
135
|
+
end
|
136
|
+
|
137
|
+
def parse_range(range)
|
138
|
+
range.is_a?(Array) ? range.first : range
|
139
|
+
end
|
140
|
+
|
141
|
+
def parse_result_key(point)
|
142
|
+
@result_key = point['event'].present? ? 'event' : 'result'
|
143
|
+
end
|
144
|
+
|
145
|
+
# TODO: Can this be made smarter? Prefer to avoid case statements.
|
146
|
+
# Cases found here: http://druid.io/docs/latest/querying/granularities.html
|
147
|
+
def start_of_interval(time)
|
148
|
+
case granularity
|
149
|
+
when 'second'
|
150
|
+
time.change(usec: 0)
|
151
|
+
when 'minute'
|
152
|
+
time.beginning_of_minute
|
153
|
+
when 'fifteen_minute'
|
154
|
+
first_fifteen = [45, 30, 15, 0].detect{ |m| m <= time.min }
|
155
|
+
time.change(min: first_fifteen)
|
156
|
+
when 'thirty_minute'
|
157
|
+
first_thirty = [30, 0].detect{ |m| m <= time.min }
|
158
|
+
time.change(min: first_thirty)
|
159
|
+
when 'hour'
|
160
|
+
time.beginning_of_hour
|
161
|
+
when 'day'
|
162
|
+
time.beginning_of_day
|
163
|
+
when 'week'
|
164
|
+
time.beginning_of_week
|
165
|
+
when 'month'
|
166
|
+
time.beginning_of_month
|
167
|
+
when 'quarter'
|
168
|
+
time.beginning_of_quarter
|
169
|
+
when 'year'
|
170
|
+
time.beginning_of_year
|
171
|
+
else
|
172
|
+
time
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
class << self
|
177
|
+
def create(opts)
|
178
|
+
new(opts).execute
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
data/lib/druid/writer.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#TODO: Seems to be a delay after shutting down Kafka and ZK updating
|
2
|
+
module Druid
|
3
|
+
class Writer
|
4
|
+
attr_reader :config, :producer, :zk
|
5
|
+
def initialize(config, zk)
|
6
|
+
@config = config
|
7
|
+
@zk = zk
|
8
|
+
init_producer
|
9
|
+
zk.register_listener(self, :handle_kafka_state_change)
|
10
|
+
end
|
11
|
+
|
12
|
+
def write_point(datasource, datapoint)
|
13
|
+
raise Druid::ConnectionError, 'no kafka brokers available' if producer.nil?
|
14
|
+
producer.produce(datapoint, topic: datasource)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def broker_list
|
20
|
+
zk.registry["/brokers/ids"].map{|instance| "#{instance[:host]}:#{instance[:port]}" }.join(',')
|
21
|
+
end
|
22
|
+
|
23
|
+
def handle_kafka_state_change(service)
|
24
|
+
if service == config.kafka_broker_path
|
25
|
+
producer.shutdown
|
26
|
+
init_producer
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def init_producer
|
31
|
+
producer_options = {
|
32
|
+
seed_brokers: broker_list,
|
33
|
+
client_id: "ruby-druid"
|
34
|
+
}
|
35
|
+
|
36
|
+
if broker_list.present?
|
37
|
+
kafka = Kafka.new(producer_options)
|
38
|
+
producer = kafka.async_producer(delivery_threshold: 100, delivery_interval: 10)
|
39
|
+
producer.deliver_messages
|
40
|
+
else
|
41
|
+
producer = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
@producer = producer
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/lib/druid/zk.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
module Druid
|
2
|
+
class ZK
|
3
|
+
attr_accessor :registry
|
4
|
+
attr_reader :client, :config, :listeners
|
5
|
+
|
6
|
+
#TODO: Test and handle ZK partitions
|
7
|
+
def initialize(config)
|
8
|
+
@client = ::ZK.new(config.zookeeper)
|
9
|
+
@config = config
|
10
|
+
@listeners = []
|
11
|
+
@registry = {}
|
12
|
+
register
|
13
|
+
end
|
14
|
+
|
15
|
+
def register_listener(object, method)
|
16
|
+
listeners << ->(*args) { object.send(method, *args) }
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def announce(service)
|
22
|
+
# puts "announcing #{service}"
|
23
|
+
listeners.each { |listener| listener.call(service) }
|
24
|
+
end
|
25
|
+
|
26
|
+
def register
|
27
|
+
register_service("#{config.discovery_path}/druid:broker")
|
28
|
+
register_service("#{config.discovery_path}/druid:coordinator")
|
29
|
+
register_service("#{config.discovery_path}/druid:overlord")
|
30
|
+
register_service("#{config.kafka_broker_path}")
|
31
|
+
end
|
32
|
+
|
33
|
+
def register_service(service)
|
34
|
+
# puts "registering #{service}"
|
35
|
+
#TODO: Thead safety, lock this registry key
|
36
|
+
subscribe_to_service(service)
|
37
|
+
renew_service_instances(service)
|
38
|
+
end
|
39
|
+
|
40
|
+
def renew_service_instances(service)
|
41
|
+
# puts "activating registered subscriptions on #{service}"
|
42
|
+
instances = client.children(service, watch: true)
|
43
|
+
|
44
|
+
# puts "emptying #{service} from registry"
|
45
|
+
registry[service] = []
|
46
|
+
instances.each do |instance|
|
47
|
+
data = JSON.parse(client.get("#{service}/#{instance}").first)
|
48
|
+
host = data['address'] || data['host']
|
49
|
+
port = data['port']
|
50
|
+
# puts "adding #{host}:#{port} to registry for #{service}"
|
51
|
+
registry[service] << { host: host, port: port }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def subscribe_to_service(service)
|
56
|
+
subscription = client.register(service) do |event|
|
57
|
+
# puts "watched event for #{service} detected"
|
58
|
+
renew_service_instances(event.path)
|
59
|
+
announce(event.path)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/druiddb.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "active_support/all"
|
2
|
+
require "ruby-kafka"
|
3
|
+
require "json"
|
4
|
+
require "zk"
|
5
|
+
|
6
|
+
require "druid/configuration"
|
7
|
+
require "druid/connection"
|
8
|
+
require "druid/errors"
|
9
|
+
require "druid/query"
|
10
|
+
require "druid/version"
|
11
|
+
require "druid/zk"
|
12
|
+
|
13
|
+
require "druid/node/broker"
|
14
|
+
require "druid/node/coordinator"
|
15
|
+
require "druid/node/overlord"
|
16
|
+
|
17
|
+
require "druid/queries/core"
|
18
|
+
require "druid/queries/task"
|
19
|
+
|
20
|
+
require "druid/writer"
|
21
|
+
require "druid/client"
|
metadata
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: druiddb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andre LeBlanc
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-07-07 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '5.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: ruby-kafka
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.3'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: zk
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.9'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.9'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.7'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.7'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '10.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '10.0'
|
83
|
+
description: Ruby adapter for Druid that allows reads and writes using the Tranquility
|
84
|
+
Kafka API.
|
85
|
+
email:
|
86
|
+
- andre.leblanc88@gmail.com
|
87
|
+
executables: []
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- ".gitignore"
|
92
|
+
- Gemfile
|
93
|
+
- LICENSE.txt
|
94
|
+
- README.md
|
95
|
+
- Rakefile
|
96
|
+
- bin/console
|
97
|
+
- bin/setup
|
98
|
+
- druiddb.gemspec
|
99
|
+
- lib/druid/README.md
|
100
|
+
- lib/druid/client.rb
|
101
|
+
- lib/druid/configuration.rb
|
102
|
+
- lib/druid/connection.rb
|
103
|
+
- lib/druid/errors.rb
|
104
|
+
- lib/druid/node/broker.rb
|
105
|
+
- lib/druid/node/coordinator.rb
|
106
|
+
- lib/druid/node/overlord.rb
|
107
|
+
- lib/druid/queries/core.rb
|
108
|
+
- lib/druid/queries/task.rb
|
109
|
+
- lib/druid/query.rb
|
110
|
+
- lib/druid/version.rb
|
111
|
+
- lib/druid/writer.rb
|
112
|
+
- lib/druid/zk.rb
|
113
|
+
- lib/druiddb.rb
|
114
|
+
homepage: https://github.com/andremleblanc/ruby-druid
|
115
|
+
licenses:
|
116
|
+
- MIT
|
117
|
+
metadata: {}
|
118
|
+
post_install_message:
|
119
|
+
rdoc_options: []
|
120
|
+
require_paths:
|
121
|
+
- lib
|
122
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
requirements: []
|
133
|
+
rubyforge_project:
|
134
|
+
rubygems_version: 2.6.12
|
135
|
+
signing_key:
|
136
|
+
specification_version: 4
|
137
|
+
summary: Ruby adapter for Druid.
|
138
|
+
test_files: []
|