ruby-druid 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjkxZmU4ODgzYmYwNzJhMTU1NDY4YjU1OGYzOTBhMmM2MzJjZDE2Ng==
5
+ data.tar.gz: !binary |-
6
+ ODQ3YWFlYThkNzQ0YWMwNjU3MTJjYjQ5Y2QzMjAxYTMyNTYwZjBiYQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZjVjOWJiZDEyNjA0ZDkwM2UzZTYyNDI5YTlkN2NkZjI5MmVjODBmNzg0Yjky
10
+ MGExM2JkOTM1OTliYTA3NmE3Y2VmOTk1ODhmZDI4NTUxYmMwOGQwYTZkZTRh
11
+ Njk1MzZhMTc5NmQ0OTQ4NmVjNjE4OWI0ZjE4M2M3Yzk4MDEwYmY=
12
+ data.tar.gz: !binary |-
13
+ MTkxZGFjYzAzNDk5NWZhY2FkM2E2ODYzNmY1MzkzZTE4M2U0MzFhMDViZWI0
14
+ YmNhOTZiNzlhZWFlYWI2ZDRiOWY4NGE2NmQ1Y2I3OTljNjIyNWEwNzczYzRl
15
+ MWZhZmQwZTA4OThiMjA4ODAyYjE0MDU0YzBjMzViY2I3ZTJjYzM=
@@ -0,0 +1,5 @@
1
+ Gemfile.lock
2
+ .driplrc
3
+ ruby-druid-0.0.1.gem
4
+ spec/test.json
5
+ tmp/rspec_guard_result
data/Gemfile ADDED
@@ -0,0 +1,21 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :test, :development do
6
+ gem 'guard'
7
+ gem 'guard-bundler'
8
+ gem 'guard-rspec'
9
+ gem 'rb-fsevent'
10
+ gem 'rspec'
11
+ gem 'ruby_gntp'
12
+ gem 'webmock'
13
+ gem 'debugger'
14
+ end
15
+
16
+ group :console do
17
+ gem 'activesupport'
18
+ gem 'awesome_print'
19
+ gem 'ripl'
20
+ gem 'terminal-table'
21
+ end
@@ -0,0 +1,10 @@
1
+ # More info at https://github.com/guard/guard#readme
2
+ guard :bundler do
3
+ watch('Gemfile')
4
+ end
5
+
6
+ guard :rspec, :cli => '--color --format nested' do
7
+ watch(%r{^spec/.+_spec\.rb$})
8
+ watch(%r{^(.+)\.rb$}) {|m| "spec/#{m[1]}_spec.rb" }
9
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
10
+ end
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 madvertise Mobile Advertising GmbH
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included
12
+ in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,286 @@
1
+ # ruby-druid
2
+
3
+ [![Code Climate](https://codeclimate.com/github/madvertise/ruby-druid.png)](https://codeclimate.com/github/madvertise/ruby-druid)
4
+
5
+ A ruby client for [druid](https://github.com/madvertise/druid).
6
+
7
+ ruby-druid generates complete JSON queries by chaining methods.
8
+ The resulting JSON can be send directly to a druid server or handled seperatly.
9
+
10
+ ## bin/dripl
11
+
12
+ ruby-druid now includes a repl:
13
+
14
+ ```ruby
15
+ $ bin/dripl
16
+ >> metrics
17
+ [
18
+ [0] "actions"
19
+ ]
20
+
21
+ >> dimensions
22
+ [
23
+ [0] "actions"
24
+ ]
25
+
26
+ >> long_sum(:actions)
27
+ +---------+
28
+ | actions |
29
+ +---------+
30
+ | 98575 |
31
+ +---------+
32
+
33
+ >> long_sum(:actions)[-7.days].granularity(:day)
34
+ +-------------------------------+----------+
35
+ | timestamp | actions |
36
+ +-------------------------------+----------+
37
+ | 2013-03-28T00:00:00.000+01:00 | 93371 |
38
+ | 2013-03-29T00:00:00.000+01:00 | 448200 |
39
+ | 2013-03-30T00:00:00.000+01:00 | 117167 |
40
+ | 2013-03-31T00:00:00.000+01:00 | 828321 |
41
+ | 2013-04-01T00:00:00.000+02:00 | 261578 |
42
+ | 2013-04-02T00:00:00.000+02:00 | 05149 |
43
+ | 2013-04-03T00:00:00.000+02:00 | 27512 |
44
+ | 2013-04-04T00:00:00.000+02:00 | 18897 |
45
+ +-------------------------------+----------+
46
+
47
+ >> long_sum(:actions)[-7.days].granularity(:day).properties
48
+ {
49
+ :dataSource => "events",
50
+ :granularity => {
51
+ :type => "period",
52
+ :period => "P1D",
53
+ :timeZone => "Europe/Berlin"
54
+ },
55
+ :intervals => [
56
+ [0] "2013-03-28T00:00:00+01:00/2013-04-04T11:57:20+02:00"
57
+ ],
58
+ :queryType => :groupBy,
59
+ :aggregations => [
60
+ [0] {
61
+ :type => "longSum",
62
+ :name => :actions,
63
+ :fieldName => :actions
64
+ }
65
+ ]
66
+ }
67
+ ```
68
+
69
+ ## Getting started
70
+
71
+ In your Gemfile:
72
+
73
+ ```ruby
74
+ gem 'ruby-druid'
75
+ ```
76
+
77
+ In your code:
78
+
79
+ ```ruby
80
+ require 'druid'
81
+ ```
82
+
83
+ ## Usage
84
+
85
+ ```ruby
86
+ Druid::Client.new('zk1:2181,zk2:2181/druid').query('service/source')
87
+ ```
88
+
89
+ returns a query object on which all other methods can be called to create a full and valid druid query.
90
+
91
+ A query object can be sent like this:
92
+
93
+ ```ruby
94
+ Druid::Client.new('zk1:2181,zk2:2181/druid').query('service/source').send
95
+ #or
96
+ client = Druid::Client.new('zk1:2181,zk2:2181/druid')
97
+ query = Druid::Query.new('service/source')
98
+ client.send(query)
99
+ ```
100
+
101
+ The `send` method returns the parsed response from the druid server as an array.
102
+ If the response is not empty it contains one `ResponseRow` object for each row.
103
+ The timestamp by can be received by a method with the same name (i.e. `row.timestamp`),
104
+ all row values by hashlike syntax (i.e. `row['dimension'])
105
+
106
+ ### group_by
107
+
108
+ Sets the dimensions to group the data.
109
+
110
+ `queryType` is set automatically to `groupBy`.
111
+
112
+
113
+ ```ruby
114
+ Druid::Query.new('service/source').group_by([:dimension1, :dimension2])
115
+ ```
116
+
117
+ ### long_sum
118
+
119
+ ```ruby
120
+ Druid::Query.new('service/source').long_sum([:aggregate1, :aggregate2])
121
+ ```
122
+
123
+ ### postagg
124
+
125
+ A simple syntax for post aggregations with +,-,/,* can be used like:
126
+
127
+ ```ruby
128
+ query = Druid::Query.new('service/source').long_sum([:aggregate1, :aggregate2])
129
+
130
+ query.postagg{(aggregate2 + aggregate2).as output_field_name}
131
+ ```
132
+
133
+ Required fields for the postaggregation are fetched automatically by the library.
134
+
135
+ ### interval
136
+
137
+ The interval for the query takes a string with date and time or objects that provide a `iso8601` method
138
+
139
+ ```ruby
140
+ query = Druid::Query.new('service/source').long_sum(:aggregate1)
141
+
142
+ query.interval("2013-01-01T00", Time.now)
143
+ ```
144
+
145
+ ### granularity
146
+
147
+ granularity can be `:all`, `:none`, `:minute`, `:fifteen_minute`, `:thirthy_minute`, `:hour` or `:day`.
148
+
149
+ It can also be a period granularity as described in https://github.com/metamx/druid/wiki/Granularities.
150
+
151
+ The period `'day'` or `:day` will be interpreted as `'P1D'`.
152
+
153
+ If a period granularity is specifed, the (optional) second parameter is a time zone. It defaults
154
+ to the machines local time zone.
155
+
156
+ I.E:
157
+ ```ruby
158
+ query = Druid::Query.new('service/source').long_sum(:aggregate1)
159
+
160
+ query.granularity(:day)
161
+ ```
162
+
163
+ is (on my box) the same as
164
+
165
+ ```ruby
166
+ query = Druid::Query.new('service/source').long_sum(:aggregate1)
167
+
168
+ query.granularity('P1D', 'Europe/Berlin')
169
+ ```
170
+
171
+ ## having (for metrics)
172
+
173
+ ### having >
174
+
175
+ ```ruby
176
+ Druid::Query.new('service/source').having{metric > 10}
177
+ ```
178
+
179
+ ### having <
180
+
181
+ ```ruby
182
+ Druid::Query.new('service/source').having{metric < 10}
183
+ ```
184
+
185
+ ## filter (for dimensions)
186
+
187
+ Filters are set by the `filter` method. It takes a block or a hash as parameter.
188
+
189
+ Filters can be chained `filter{...}.filter{...}`
190
+
191
+ ### filter == , eq
192
+
193
+ ```ruby
194
+ Druid::Query.new('service/source').filter{dimension.eq 1}
195
+
196
+ #this is the same as
197
+
198
+ Druid::Query.new('service/source').filter{dimension == 1}
199
+ ```
200
+
201
+ ### filter != , neq
202
+
203
+ ```ruby
204
+ Druid::Query.new('service/source').filter{dimension.neq 1}
205
+
206
+ #this is the same as
207
+
208
+ Druid::Query.new('service/source').filter{dimension != 1}
209
+ ```
210
+
211
+ ### filter and
212
+
213
+ a logical or than can combine all other filters
214
+
215
+ ```ruby
216
+ Druid::Query.new('service/source').filter{dimension.neq 1 & dimension2.neq 2}
217
+ ```
218
+
219
+ ### filter or
220
+
221
+ a logical or than can combine all other filters
222
+
223
+ ```ruby
224
+ Druid::Query.new('service/source').filter{dimension.neq 1 | dimension2.neq 2}
225
+ ```
226
+
227
+ ### filter not
228
+
229
+ a logical not than can negate all other filter
230
+
231
+ ```ruby
232
+ Druid::Query.new('service/source').filter{!dimension.eq(1)}
233
+ ```
234
+
235
+ ### filter in
236
+
237
+ This filter creates a set of equals filters in an and filter.
238
+
239
+ ```ruby
240
+ Druid::Query.new('service/source').filter{dimension.in(1,2,3)}
241
+ ```
242
+
243
+ ### filter with hash syntax
244
+
245
+ sometimes it can be useful to use a hash syntax for filtering
246
+ for example if you already get them from a list or parameterhash
247
+
248
+ ```ruby
249
+ Druid::Query.new('service/source').filter{dimension => 1, dimension1 =>2, dimension2 => 3}
250
+
251
+ #this is the same as
252
+
253
+ Druid::Query.new('service/source').filter{dimension.eq(1) & dimension1.eq(2) & dimension2.eq(3)}
254
+ ```
255
+
256
+ ### filter >, <, >=, <=
257
+
258
+ ```ruby
259
+ Druid::Query.new('service/source').filter{dimension >= 1}
260
+ ```
261
+
262
+ ### filter javascript
263
+
264
+ ```ruby
265
+ Druid::Query.new('service/source').filter{a.javascript('dimension >= 1 && dimension < 5')}
266
+
267
+ #this also the same as
268
+
269
+ Druid::Query.new('service/source').filter{(dimension >= 1) & (dimension < 5)}
270
+ ```
271
+
272
+ ## Acknowledgements
273
+
274
+ Post aggregation expression parsing built with the help of [Squeel](https://github.com/ernie/squeel).
275
+
276
+ ## Contributions
277
+
278
+ ruby-druid is developed by madvertise Mobile Advertising GmbH
279
+
280
+ You can support us on different ways:
281
+
282
+ * Use ruby-druid, and let us know if you encounter anything that's broken or missing.
283
+ A failing spec is great. A pull request with your fix is even better!
284
+ * Spread the word about ruby-druid on Twitter, Facebook, and elsewhere.
285
+ * Work with us at madvertise on awesome stuff like this.
286
+ [Read the job description](http://madvertise.com/software-developer-ruby-fm-berlin) and send a mail to careers@madvertise.com.
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ $:.unshift(File.join(File.expand_path("../..", __FILE__), 'lib'))
5
+
6
+ $0 = "dripl"
7
+
8
+ def zookeeper(value)
9
+ @zk_uri = value
10
+ end
11
+
12
+ def uri(value)
13
+ puts "using 'uri' in the config is deprecated, use 'zookeeper' instead"
14
+ zookeeper value
15
+ end
16
+
17
+ def source(value)
18
+ @source = value
19
+ end
20
+
21
+ def options(value)
22
+ @options = value
23
+ end
24
+
25
+ begin
26
+ driplrc = File.read(File.join(File.expand_path("../..", __FILE__), '.driplrc'))
27
+ rescue
28
+ puts "You need to create a .driplrc, take a look at dot_driplrc_example"
29
+ exit 1
30
+ end
31
+
32
+ instance_eval(driplrc)
33
+
34
+ unless @zk_uri || (@options && @options[:static_setup])
35
+ puts "Your .driplrc is incomplete, please fix"
36
+ exit 1
37
+ end
38
+
39
+ require 'druid/console'
40
+ Druid::Console.new(@zk_uri, @source, @options)
@@ -0,0 +1,12 @@
1
+ ## your zookeeper config. For static scenarios (i.e. ssh tunnels) see options
2
+ ##
3
+ # zookeeper "localhost:2181/druid"
4
+
5
+ ## using options, you can disable zookeeper lookup
6
+ ## options[:static_setup], the key is the source name, the value is the brokers post uri
7
+ ##
8
+ # options :static_setup => { 'example/events' => 'http://localhost:8080/druid/v2/' }
9
+
10
+ ## dripl will default to use the first available data source. use this to override
11
+ ##
12
+ # source "example/events"
@@ -0,0 +1,8 @@
1
+ require 'druid/client'
2
+ require 'druid/query'
3
+ require 'druid/response_row'
4
+ require 'druid/zoo_handler'
5
+
6
+ module Druid
7
+
8
+ end
@@ -0,0 +1,95 @@
1
+ module Druid
2
+ class Client
3
+ TIMEOUT = 2 * 60 * 1000
4
+
5
+ def initialize(zookeeper_uri, opts = nil)
6
+ opts ||= {}
7
+
8
+ if opts[:static_setup] && !opts[:fallback]
9
+ @static = opts[:static_setup]
10
+ else
11
+ @backup = opts[:static_setup] if opts[:fallback]
12
+ zookeeper_caching_management!(zookeeper_uri, opts)
13
+ end
14
+ end
15
+
16
+ def send(query)
17
+ uri = data_source_uri(query.source)
18
+ raise "data source #{query.source} (currently) not available" unless uri
19
+
20
+ req = Net::HTTP::Post.new(uri.path, initheader = {'Content-Type' =>'application/json'})
21
+ req.body = query.to_json
22
+ puts req.body
23
+
24
+ response = Net::HTTP.new(uri.host, uri.port).start do |http|
25
+ http.read_timeout = TIMEOUT
26
+ http.request(req)
27
+ end
28
+
29
+ if response.code == "200"
30
+ JSON.parse(response.body).map{ |row| ResponseRow.new(row) }
31
+ else
32
+ raise "Request failed: #{response.code}: #{response.body}"
33
+ end
34
+ end
35
+
36
+ def query(id, &block)
37
+ uri = data_source_uri(id)
38
+ raise "data source #{id} (currently) not available" unless uri
39
+ query = Query.new(id, self)
40
+ return query unless block
41
+
42
+ send query
43
+ end
44
+
45
+ def zookeeper_caching_management!(zookeeper_uri, opts)
46
+ @zk = ZooHandler.new(zookeeper_uri, opts)
47
+
48
+ unless opts[:zk_keepalive]
49
+ @cached_data_sources = @zk.data_sources unless @zk.nil?
50
+
51
+ @zk.close!
52
+ end
53
+ end
54
+
55
+ def ds
56
+ @cached_data_sources || (@zk.data_sources unless @zk.nil?)
57
+ end
58
+
59
+ def data_sources
60
+ (ds.nil? ? @static : ds).keys
61
+ end
62
+
63
+ def data_source_uri(source)
64
+ uri = (ds.nil? ? @static : ds)[source]
65
+ begin
66
+ return URI(uri) if uri
67
+ rescue
68
+ return URI(@backup) if @backup
69
+ end
70
+ end
71
+
72
+ def data_source(source)
73
+ uri = data_source_uri(source)
74
+ raise "data source #{source} (currently) not available" unless uri
75
+
76
+ meta_path = "#{uri.path}datasources/#{source.split('/').last}"
77
+
78
+ req = Net::HTTP::Get.new(meta_path)
79
+
80
+ response = Net::HTTP.new(uri.host, uri.port).start do |http|
81
+ http.read_timeout = TIMEOUT
82
+ http.request(req)
83
+ end
84
+
85
+ if response.code == "200"
86
+ meta = JSON.parse(response.body)
87
+ meta.define_singleton_method(:dimensions) { self['dimensions'] }
88
+ meta.define_singleton_method(:metrics) { self['metrics'] }
89
+ meta
90
+ else
91
+ raise "Request failed: #{response.code}: #{response.body}"
92
+ end
93
+ end
94
+ end
95
+ end