druiddb 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 39e6d627318a91e94886eb34b4d05b349d79153f
4
+ data.tar.gz: b6bc938cd4175a50e2aaa630123a03faf5119660
5
+ SHA512:
6
+ metadata.gz: 202fd72edd5740aa09727d7eec183cc79ddaf2cfbd9bc529e7d908252039c78354c89ba7a58bb19f0fc550972f866d39abe9f8291fbc53bb956ec5260137a793
7
+ data.tar.gz: fb94c7c1e92b3a653d76851c420f8368b27d6a6dfd63402cf7bb8cca6d5e2247d028dc9fde074b0b5c943911d99e8c93bc330616fbfb487ea3691a18238fe552
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /example
11
+ zookeeper.out
12
+ jruby-druid.log
13
+ .ruby-version
14
+ *.gem
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Andre LeBlanc
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1 @@
1
+ # ruby-druid
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "irb"
5
+ require "druiddb"
6
+
7
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'druid/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "druiddb"
8
+ spec.version = RubyDruid::VERSION
9
+ spec.authors = ["Andre LeBlanc"]
10
+ spec.email = ["andre.leblanc88@gmail.com"]
11
+
12
+ spec.summary = 'Ruby adapter for Druid.'
13
+ spec.description = 'Ruby adapter for Druid that allows reads and writes using the Tranquility Kafka API.'
14
+ spec.homepage = "https://github.com/andremleblanc/ruby-druid"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency "activesupport", '~> 5.0'
23
+ spec.add_dependency "ruby-kafka", '~> 0.3'
24
+ spec.add_dependency "zk", '~> 1.9'
25
+
26
+ spec.add_development_dependency "bundler", '~> 1.7'
27
+ spec.add_development_dependency "rake", '~> 10.0'
28
+ end
@@ -0,0 +1,20 @@
1
+ # Druid
2
+ This module contains all logic associated with Druid.
3
+
4
+ ## Node
5
+ The `Node` classes represent Druid nodes and manage connection with them. They
6
+ also provide the methods that are exposed natively by the Druid REST API.
7
+
8
+ ## Query
9
+ The query module provides a way for the `Druid::Client` to inherit the methods
10
+ from the `Node` classes. Additionally, the `Query` module classes provide some
11
+ additional methods not found natively in the Druid REST API.
12
+
13
+ ## Writer
14
+ The `Writer` classes utilize the Tranquility Kafka API to communicate with Druid
15
+ nodes and allows writing.
16
+
17
+ ## Errors
18
+ **Client Error:** Indicates a failure within the Ruby-Druid adapter.
19
+ **Connection Error:** Indicates a failed request to Druid.
20
+ **QueryError:** Indicates a malformed query.
@@ -0,0 +1,22 @@
1
+ module Druid
2
+ class Client
3
+ include Druid::Queries::Core
4
+ include Druid::Queries::Task
5
+
6
+ attr_reader :broker,
7
+ :config,
8
+ :coordinator,
9
+ :overlord,
10
+ :writer,
11
+ :zk
12
+
13
+ def initialize(options = {})
14
+ @config = Druid::Configuration.new(options)
15
+ @zk = Druid::ZK.new(config)
16
+ @broker = Druid::Node::Broker.new(config, zk)
17
+ @coordinator = Druid::Node::Coordinator.new(config, zk)
18
+ @overlord = Druid::Node::Overlord.new(config, zk)
19
+ @writer = Druid::Writer.new(config, zk)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,51 @@
1
+ module Druid
2
+ class Configuration
3
+ DISCOVERY_PATH = '/druid/discovery'.freeze
4
+ INDEX_SERVICE = 'druid/overlord'.freeze
5
+ KAFKA_BROKER_PATH = '/brokers/ids'.freeze
6
+ LOG_LEVEL = :error
7
+ ROLLUP_GRANULARITY = :minute
8
+ STRONG_DELETE = false # Not recommend to be true for production.
9
+ TUNING_GRANULARITY = :day
10
+ TUNING_WINDOW = 'PT1H'.freeze
11
+ WAIT_TIME = 20 # Seconds
12
+ ZOOKEEPER = 'localhost:2181'.freeze
13
+
14
+ attr_reader :discovery_path,
15
+ :index_service,
16
+ :kafka_broker_path,
17
+ :log_level,
18
+ :rollup_granularity,
19
+ :strong_delete,
20
+ :tuning_granularity,
21
+ :tuning_window,
22
+ :wait_time,
23
+ :zookeeper
24
+
25
+
26
+ def initialize(opts = {})
27
+ @discovery_path = opts[:discovery_path] || DISCOVERY_PATH
28
+ @index_service = opts[:index_service] || INDEX_SERVICE
29
+ @kafka_broker_path = opts[:kafka_broker_path] || KAFKA_BROKER_PATH
30
+ @log_level = opts[:log_level] || LOG_LEVEL
31
+ @rollup_granularity = rollup_granularity_string(opts[:rollup_granularity])
32
+ @strong_delete = opts[:strong_delete] || STRONG_DELETE
33
+ @tuning_granularity = tuning_granularity_string(opts[:tuning_granularity])
34
+ @tuning_window = opts[:tuning_window] || TUNING_WINDOW
35
+ @wait_time = opts[:wait_time] || WAIT_TIME
36
+ @zookeeper = opts[:zookeeper] || ZOOKEEPER
37
+ end
38
+
39
+ private
40
+
41
+ def rollup_granularity_string(input)
42
+ output_string = input || ROLLUP_GRANULARITY
43
+ output_string.to_s.upcase.freeze
44
+ end
45
+
46
+ def tuning_granularity_string(input)
47
+ output_string = input || TUNING_GRANULARITY
48
+ output_string.to_s.upcase.freeze
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,70 @@
1
+ # Based on: http://danknox.github.io/2013/01/27/using-rubys-native-nethttp-library/
2
+ require 'net/http'
3
+
4
+ module Druid
5
+ class Connection
6
+ CONTENT_TYPE = 'application/json'.freeze
7
+ VERB_MAP = {
8
+ :get => ::Net::HTTP::Get,
9
+ :post => ::Net::HTTP::Post,
10
+ :put => ::Net::HTTP::Put,
11
+ :delete => ::Net::HTTP::Delete
12
+ }
13
+
14
+ attr_reader :http
15
+
16
+ def initialize(endpoint)
17
+ if endpoint.is_a? String
18
+ uri = URI.parse(endpoint)
19
+ host, port = uri.host, uri.port
20
+ else
21
+ host, port = endpoint.values_at(:host, :port)
22
+ end
23
+
24
+ @http = ::Net::HTTP.new(host, port)
25
+ end
26
+
27
+ def get(path, params = {})
28
+ request :get, path, params
29
+ end
30
+
31
+ def post(path, params = {})
32
+ request :post, path, params
33
+ end
34
+
35
+ def put(path, params = {})
36
+ request :put, path, params
37
+ end
38
+
39
+ def delete(path, params = {})
40
+ request :delete, path, params
41
+ end
42
+
43
+ private
44
+
45
+ def encode_path_params(path, params)
46
+ encoded = URI.encode_www_form(params)
47
+ [path, encoded].join("?")
48
+ end
49
+
50
+ def request(method, path, params)
51
+ case method
52
+ when :get
53
+ full_path = encode_path_params(path, params)
54
+ request = VERB_MAP[method].new(full_path)
55
+ else
56
+ request = VERB_MAP[method].new(path)
57
+ request.body = params.to_json
58
+ end
59
+
60
+ request.content_type = CONTENT_TYPE
61
+ begin
62
+ response = http.request(request)
63
+ rescue Timeout::Error, *Druid::NET_HTTP_EXCEPTIONS => e
64
+ raise ConnectionError, e.message
65
+ end
66
+
67
+ response
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,22 @@
1
+ module Druid
2
+ class Error < StandardError; end
3
+ class ClientError < Error; end
4
+ class ConnectionError < Error; end
5
+ class QueryError < Error; end
6
+ class ValidationError < Error; end
7
+
8
+ # Adopted from: https://github.com/lostisland/faraday/blob/master/lib/faraday/adapter/net_http.rb
9
+ NET_HTTP_EXCEPTIONS = [
10
+ EOFError,
11
+ Errno::ECONNABORTED,
12
+ Errno::ECONNREFUSED,
13
+ Errno::ECONNRESET,
14
+ Errno::EHOSTUNREACH,
15
+ Errno::EINVAL,
16
+ Errno::ENETUNREACH,
17
+ Net::HTTPBadResponse,
18
+ Net::HTTPHeaderSyntaxError,
19
+ Net::ProtocolError,
20
+ SocketError
21
+ ]
22
+ end
@@ -0,0 +1,35 @@
1
+ module Druid
2
+ module Node
3
+ class Broker
4
+ QUERY_PATH = '/druid/v2'.freeze
5
+
6
+ attr_reader :config, :zk
7
+ def initialize(config, zk)
8
+ @config = config
9
+ @zk = zk
10
+ end
11
+
12
+ #TODO: Would caching connections be beneficial?
13
+ def connection
14
+ broker = zk.registry["#{config.discovery_path}/druid:broker"].first
15
+ raise Druid::ConnectionError, 'no druid brokers available' if broker.nil?
16
+ zk.registry["#{config.discovery_path}/druid:broker"].rotate! # round-robin load balancing
17
+ Druid::Connection.new(host: broker[:host], port: broker[:port])
18
+ end
19
+
20
+ def query(query_object)
21
+ begin
22
+ response = connection.post(QUERY_PATH, query_object)
23
+ rescue Druid::ConnectionError => e
24
+ # TODO: This sucks, make it better
25
+ (zk.registry["#{config.discovery_path}/druid:broker"].size - 1).times do
26
+ response = connection.post(QUERY_PATH, query_object)
27
+ break if response.code.to_i == 200
28
+ end
29
+ end
30
+ raise QueryError unless response.code.to_i == 200
31
+ JSON.parse(response.body)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,117 @@
1
+ module Druid
2
+ module Node
3
+ class Coordinator
4
+ DATASOURCES_PATH = '/druid/coordinator/v1/datasources/'.freeze
5
+
6
+ attr_reader :config, :zk
7
+ def initialize(config, zk)
8
+ @config = config
9
+ @zk = zk
10
+ end
11
+
12
+ # TODO: DRY; copy/paste from broker
13
+ def connection
14
+ coordinator = zk.registry["#{config.discovery_path}/druid:coordinator"].first
15
+ raise Druid::ConnectionError, 'no druid coordinators available' if coordinator.nil?
16
+ zk.registry["#{config.discovery_path}/druid:coordinator"].rotate! # round-robin load balancing
17
+ Druid::Connection.new(host: coordinator[:host], port: coordinator[:port])
18
+ end
19
+
20
+ def datasource_info(datasource_name)
21
+ response = connection.get(DATASOURCES_PATH + datasource_name.to_s, full: true)
22
+ raise ConnectionError, 'Unable to retrieve datasource information.' unless response.code.to_i == 200
23
+ JSON.parse(response.body)
24
+ end
25
+
26
+ def disable_datasource(datasource_name)
27
+ # response = connection.delete(DATASOURCES_PATH + datasource_name.to_s)
28
+ # raise ConnectionError, 'Unable to disable datasource' unless response.code.to_i == 200
29
+ # return true if response.code.to_i == 200
30
+
31
+ # This is a workaround for https://github.com/druid-io/druid/issues/3154
32
+ disable_segments(datasource_name)
33
+ bounded_wait_for_segments_disable(datasource_name)
34
+ true
35
+ end
36
+
37
+ # TODO: This should either be private or moved to datasource
38
+ def datasource_enabled?(datasource_name)
39
+ list_datasources.include? datasource_name
40
+ end
41
+
42
+ # TODO: This should either be private or moved to datasource
43
+ def datasource_has_segments?(datasource_name)
44
+ list_segments(datasource_name).any?
45
+ end
46
+
47
+ def disable_segment(datasource_name, segment)
48
+ response = connection.delete(DATASOURCES_PATH + datasource_name + '/segments/' + segment)
49
+ raise ConnectionError, "Unable to disable #{segment}" unless response.code.to_i == 200
50
+ true
51
+ end
52
+
53
+ # TODO: This should either be private or moved to datasource
54
+ def disable_segments(datasource_name)
55
+ segments = list_segments(datasource_name)
56
+ segments.each{ |segment| disable_segment(datasource_name, segment) }
57
+ end
58
+
59
+ def issue_kill_task(datasource_name, interval)
60
+ response = connection.delete(DATASOURCES_PATH + datasource_name + '/intervals/' + interval)
61
+ raise ConnectionError, 'Unable to issue kill task.' unless response.code.to_i == 200
62
+ true
63
+ end
64
+
65
+ def list_datasources(url_params = {})
66
+ response = connection.get(DATASOURCES_PATH, url_params)
67
+ JSON.parse(response.body) if response.code.to_i == 200
68
+ end
69
+
70
+ def list_segments(datasource_name)
71
+ response = connection.get(DATASOURCES_PATH + datasource_name + '/segments', full: true)
72
+ case response.code.to_i
73
+ when 200
74
+ JSON.parse(response.body).map{ |segment| segment['identifier'] }
75
+ when 204
76
+ []
77
+ else
78
+ raise ConnectionError, "Unable to list segments for #{datasource_name}"
79
+ end
80
+ end
81
+
82
+ private
83
+
84
+ def bounded_wait_for_disable(datasource_name)
85
+ condition = datasource_enabled?(datasource_name)
86
+ attempts = 0
87
+ max = 10
88
+
89
+ while(condition) do
90
+ attempts += 1
91
+ sleep 1
92
+ condition = datasource_enabled?(datasource_name)
93
+ break if attempts >= max
94
+ end
95
+
96
+ raise ClientError, 'Datasource should be disabled, but is still enabled.' unless condition
97
+ true
98
+ end
99
+
100
+ def bounded_wait_for_segments_disable(datasource_name)
101
+ condition = datasource_has_segments?(datasource_name)
102
+ attempts = 0
103
+ max = 60
104
+
105
+ while(condition) do
106
+ attempts += 1
107
+ sleep 1
108
+ condition = datasource_has_segments?(datasource_name)
109
+ break if attempts >= max
110
+ end
111
+
112
+ raise ClientError, 'Segments should be disabled, but are still enabled.' if condition
113
+ true
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,60 @@
1
+ module Druid
2
+ module Node
3
+ class Overlord
4
+ INDEXER_PATH = '/druid/indexer/v1/'.freeze
5
+ RUNNING_TASKS_PATH = (INDEXER_PATH + 'runningTasks').freeze
6
+ TASK_PATH = INDEXER_PATH + 'task/'
7
+
8
+ attr_reader :config, :zk
9
+ def initialize(config, zk)
10
+ @config = config
11
+ @zk = zk
12
+ end
13
+
14
+ #TODO: DRY: copy/paste
15
+ def connection
16
+ overlord = zk.registry["#{config.discovery_path}/druid:overlord"].first
17
+ raise Druid::ConnectionError, 'no druid overlords available' if overlord.nil?
18
+ zk.registry["#{config.discovery_path}/druid:overlord"].rotate! # round-robin load balancing
19
+ Druid::Connection.new(host: overlord[:host], port: overlord[:port])
20
+ end
21
+
22
+ def running_tasks(datasource_name = nil)
23
+ response = connection.get(RUNNING_TASKS_PATH)
24
+ raise ConnectionError, 'Could not retrieve running tasks' unless response.code.to_i == 200
25
+ tasks = JSON.parse(response.body).map{|task| task['id']}
26
+ tasks.select!{ |task| task.include? datasource_name } if datasource_name
27
+ tasks ? tasks : []
28
+ end
29
+
30
+ def shutdown_task(task)
31
+ response = connection.post(TASK_PATH + task + '/shutdown')
32
+ raise ConnectionError, 'Unable to shutdown task' unless response.code.to_i == 200
33
+ bounded_wait_for_shutdown(task)
34
+ end
35
+
36
+ def shutdown_tasks(datasource_name = nil)
37
+ tasks = running_tasks(datasource_name)
38
+ tasks.each{|task| shutdown_task(task)}
39
+ end
40
+
41
+ private
42
+
43
+ def bounded_wait_for_shutdown(task)
44
+ condition = !(running_tasks.include? task)
45
+ attempts = 0
46
+ max = 10
47
+
48
+ until(condition) do
49
+ attempts += 1
50
+ sleep 1
51
+ condition = !(running_tasks.include? task)
52
+ break if attempts >= max
53
+ end
54
+
55
+ raise ClientError, 'Task did not shutdown.' unless condition
56
+ true
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,11 @@
1
+ module Druid
2
+ module Queries
3
+ module Core
4
+ delegate :write_point, to: :writer
5
+
6
+ def query(opts)
7
+ Druid::Query.create(opts.merge(broker: broker))
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,7 @@
1
+ module Druid
2
+ module Queries
3
+ module Task
4
+ delegate :shutdown_tasks, to: :overlord
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,182 @@
1
+ module Druid
2
+ class Query
3
+ attr_reader :aggregations,
4
+ :broker,
5
+ :dimensions,
6
+ :end_interval,
7
+ :fill_value,
8
+ :granularity,
9
+ :query_opts,
10
+ :query_type,
11
+ :range,
12
+ :result_key,
13
+ :start_interval
14
+
15
+ def initialize(opts)
16
+ @aggregations = opts[:aggregations].map{|agg| agg[:name]}
17
+ @broker = opts[:broker]
18
+ @dimensions = opts[:dimensions]
19
+ @fill_value = opts[:fill_value]
20
+ @granularity = opts[:granularity]
21
+ @range = parse_range(opts[:intervals])
22
+ @query_type = opts[:queryType]
23
+ @end_interval = calculate_end_interval
24
+ @start_interval = calculate_start_interval
25
+ @query_opts = opts_for_query(opts)
26
+ end
27
+
28
+ def execute
29
+ result = broker.query(query_opts)
30
+ fill_query_results(result)
31
+ end
32
+
33
+ private
34
+
35
+ # TODO: Can this be made smarter? Prefer to avoid case statements.
36
+ # Cases found here: http://druid.io/docs/latest/querying/granularities.html
37
+ def advance_interval(time)
38
+ case granularity
39
+ when 'second'
40
+ time.advance(seconds: 1)
41
+ when 'minute'
42
+ time.advance(minutes: 1)
43
+ when 'fifteen_minute'
44
+ time.advance(minutes: 15)
45
+ when 'thirty_minute'
46
+ time.advance(minutes: 30)
47
+ when 'hour'
48
+ time.advance(hours: 1)
49
+ when 'day'
50
+ time.advance(days: 1)
51
+ when 'week'
52
+ time.advance(weeks: 1)
53
+ when 'month'
54
+ time.advance(months: 1)
55
+ when 'quarter'
56
+ time.advance(months: 3)
57
+ when 'year'
58
+ time.advance(years: 1)
59
+ else
60
+ raise Druid::QueryError, 'Unsupported granularity'
61
+ end
62
+ end
63
+
64
+ def calculate_end_interval
65
+ iso8601_duration_end_interval(range)
66
+ end
67
+
68
+ def calculate_start_interval
69
+ time = iso8601_duration_start_interval(range)
70
+ start_of_interval(time)
71
+ end
72
+
73
+ def fill_empty_intervals(points, opts = {})
74
+ interval = start_interval
75
+ result = []
76
+
77
+ while interval <= end_interval do
78
+ # TODO:
79
+ # This will search the points every time, could be more performant if
80
+ # we track the 'current point' in the points and only compare the
81
+ # current point's timestamp
82
+ point = find_or_create_point(interval, points)
83
+ aggregations.each do |aggregation|
84
+ point[result_key][aggregation] = fill_value if point[result_key][aggregation].blank?
85
+ point[result_key].merge!(opts)
86
+ end
87
+ result << point
88
+ interval = advance_interval(interval)
89
+ end
90
+
91
+ result
92
+ end
93
+
94
+ # NOTE:
95
+ # This responsibility really lies in Druid, but until the feature works
96
+ # reliably in Druid, this is serves the purpose.
97
+ # https://github.com/druid-io/druid/issues/2106
98
+ def fill_query_results(query_result)
99
+ return query_result unless query_result.present? && fill_value.present?
100
+ parse_result_key(query_result.first)
101
+
102
+ #TODO: handle multi-dimensional group by
103
+ if group_by?
104
+ result = []
105
+ dimension_key = dimensions.first
106
+ groups = query_result.group_by{ |point| point[result_key][dimension_key] }
107
+ groups.each do |dimension_value, dimension_points|
108
+ result += fill_empty_intervals(dimension_points, { dimension_key => dimension_value })
109
+ end
110
+ result
111
+ else
112
+ fill_empty_intervals(query_result)
113
+ end
114
+ end
115
+
116
+ def find_or_create_point(interval, points)
117
+ point = points.find{ |point| point['timestamp'].to_s.to_time == interval.to_time }
118
+ point.present? ? point : { 'timestamp' => interval.iso8601(3), result_key => {} }
119
+ end
120
+
121
+ def group_by?
122
+ query_type == 'groupBy'
123
+ end
124
+
125
+ def iso8601_duration_start_interval(duration)
126
+ duration.split('/').first.to_time.utc
127
+ end
128
+
129
+ def iso8601_duration_end_interval(duration)
130
+ duration.split('/').last.to_time.utc
131
+ end
132
+
133
+ def opts_for_query(opts)
134
+ opts.except(:fill_value, :broker)
135
+ end
136
+
137
+ def parse_range(range)
138
+ range.is_a?(Array) ? range.first : range
139
+ end
140
+
141
+ def parse_result_key(point)
142
+ @result_key = point['event'].present? ? 'event' : 'result'
143
+ end
144
+
145
+ # TODO: Can this be made smarter? Prefer to avoid case statements.
146
+ # Cases found here: http://druid.io/docs/latest/querying/granularities.html
147
+ def start_of_interval(time)
148
+ case granularity
149
+ when 'second'
150
+ time.change(usec: 0)
151
+ when 'minute'
152
+ time.beginning_of_minute
153
+ when 'fifteen_minute'
154
+ first_fifteen = [45, 30, 15, 0].detect{ |m| m <= time.min }
155
+ time.change(min: first_fifteen)
156
+ when 'thirty_minute'
157
+ first_thirty = [30, 0].detect{ |m| m <= time.min }
158
+ time.change(min: first_thirty)
159
+ when 'hour'
160
+ time.beginning_of_hour
161
+ when 'day'
162
+ time.beginning_of_day
163
+ when 'week'
164
+ time.beginning_of_week
165
+ when 'month'
166
+ time.beginning_of_month
167
+ when 'quarter'
168
+ time.beginning_of_quarter
169
+ when 'year'
170
+ time.beginning_of_year
171
+ else
172
+ time
173
+ end
174
+ end
175
+
176
+ class << self
177
+ def create(opts)
178
+ new(opts).execute
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,3 @@
1
+ module RubyDruid
2
+ VERSION = '1.0.0'
3
+ end
@@ -0,0 +1,47 @@
1
+ #TODO: Seems to be a delay after shutting down Kafka and ZK updating
2
+ module Druid
3
+ class Writer
4
+ attr_reader :config, :producer, :zk
5
+ def initialize(config, zk)
6
+ @config = config
7
+ @zk = zk
8
+ init_producer
9
+ zk.register_listener(self, :handle_kafka_state_change)
10
+ end
11
+
12
+ def write_point(datasource, datapoint)
13
+ raise Druid::ConnectionError, 'no kafka brokers available' if producer.nil?
14
+ producer.produce(datapoint, topic: datasource)
15
+ end
16
+
17
+ private
18
+
19
+ def broker_list
20
+ zk.registry["/brokers/ids"].map{|instance| "#{instance[:host]}:#{instance[:port]}" }.join(',')
21
+ end
22
+
23
+ def handle_kafka_state_change(service)
24
+ if service == config.kafka_broker_path
25
+ producer.shutdown
26
+ init_producer
27
+ end
28
+ end
29
+
30
+ def init_producer
31
+ producer_options = {
32
+ seed_brokers: broker_list,
33
+ client_id: "ruby-druid"
34
+ }
35
+
36
+ if broker_list.present?
37
+ kafka = Kafka.new(producer_options)
38
+ producer = kafka.async_producer(delivery_threshold: 100, delivery_interval: 10)
39
+ producer.deliver_messages
40
+ else
41
+ producer = nil
42
+ end
43
+
44
+ @producer = producer
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,63 @@
1
+ module Druid
2
+ class ZK
3
+ attr_accessor :registry
4
+ attr_reader :client, :config, :listeners
5
+
6
+ #TODO: Test and handle ZK partitions
7
+ def initialize(config)
8
+ @client = ::ZK.new(config.zookeeper)
9
+ @config = config
10
+ @listeners = []
11
+ @registry = {}
12
+ register
13
+ end
14
+
15
+ def register_listener(object, method)
16
+ listeners << ->(*args) { object.send(method, *args) }
17
+ end
18
+
19
+ private
20
+
21
+ def announce(service)
22
+ # puts "announcing #{service}"
23
+ listeners.each { |listener| listener.call(service) }
24
+ end
25
+
26
+ def register
27
+ register_service("#{config.discovery_path}/druid:broker")
28
+ register_service("#{config.discovery_path}/druid:coordinator")
29
+ register_service("#{config.discovery_path}/druid:overlord")
30
+ register_service("#{config.kafka_broker_path}")
31
+ end
32
+
33
+ def register_service(service)
34
+ # puts "registering #{service}"
35
+ #TODO: Thead safety, lock this registry key
36
+ subscribe_to_service(service)
37
+ renew_service_instances(service)
38
+ end
39
+
40
+ def renew_service_instances(service)
41
+ # puts "activating registered subscriptions on #{service}"
42
+ instances = client.children(service, watch: true)
43
+
44
+ # puts "emptying #{service} from registry"
45
+ registry[service] = []
46
+ instances.each do |instance|
47
+ data = JSON.parse(client.get("#{service}/#{instance}").first)
48
+ host = data['address'] || data['host']
49
+ port = data['port']
50
+ # puts "adding #{host}:#{port} to registry for #{service}"
51
+ registry[service] << { host: host, port: port }
52
+ end
53
+ end
54
+
55
+ def subscribe_to_service(service)
56
+ subscription = client.register(service) do |event|
57
+ # puts "watched event for #{service} detected"
58
+ renew_service_instances(event.path)
59
+ announce(event.path)
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,21 @@
1
+ require "active_support/all"
2
+ require "ruby-kafka"
3
+ require "json"
4
+ require "zk"
5
+
6
+ require "druid/configuration"
7
+ require "druid/connection"
8
+ require "druid/errors"
9
+ require "druid/query"
10
+ require "druid/version"
11
+ require "druid/zk"
12
+
13
+ require "druid/node/broker"
14
+ require "druid/node/coordinator"
15
+ require "druid/node/overlord"
16
+
17
+ require "druid/queries/core"
18
+ require "druid/queries/task"
19
+
20
+ require "druid/writer"
21
+ require "druid/client"
metadata ADDED
@@ -0,0 +1,138 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: druiddb
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Andre LeBlanc
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-07-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: ruby-kafka
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.3'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: zk
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.9'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.9'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.7'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ description: Ruby adapter for Druid that allows reads and writes using the Tranquility
84
+ Kafka API.
85
+ email:
86
+ - andre.leblanc88@gmail.com
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - bin/console
97
+ - bin/setup
98
+ - druiddb.gemspec
99
+ - lib/druid/README.md
100
+ - lib/druid/client.rb
101
+ - lib/druid/configuration.rb
102
+ - lib/druid/connection.rb
103
+ - lib/druid/errors.rb
104
+ - lib/druid/node/broker.rb
105
+ - lib/druid/node/coordinator.rb
106
+ - lib/druid/node/overlord.rb
107
+ - lib/druid/queries/core.rb
108
+ - lib/druid/queries/task.rb
109
+ - lib/druid/query.rb
110
+ - lib/druid/version.rb
111
+ - lib/druid/writer.rb
112
+ - lib/druid/zk.rb
113
+ - lib/druiddb.rb
114
+ homepage: https://github.com/andremleblanc/ruby-druid
115
+ licenses:
116
+ - MIT
117
+ metadata: {}
118
+ post_install_message:
119
+ rdoc_options: []
120
+ require_paths:
121
+ - lib
122
+ required_ruby_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ required_rubygems_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ requirements: []
133
+ rubyforge_project:
134
+ rubygems_version: 2.6.12
135
+ signing_key:
136
+ specification_version: 4
137
+ summary: Ruby adapter for Druid.
138
+ test_files: []