ruby-druid 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjkxZmU4ODgzYmYwNzJhMTU1NDY4YjU1OGYzOTBhMmM2MzJjZDE2Ng==
5
+ data.tar.gz: !binary |-
6
+ ODQ3YWFlYThkNzQ0YWMwNjU3MTJjYjQ5Y2QzMjAxYTMyNTYwZjBiYQ==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZjVjOWJiZDEyNjA0ZDkwM2UzZTYyNDI5YTlkN2NkZjI5MmVjODBmNzg0Yjky
10
+ MGExM2JkOTM1OTliYTA3NmE3Y2VmOTk1ODhmZDI4NTUxYmMwOGQwYTZkZTRh
11
+ Njk1MzZhMTc5NmQ0OTQ4NmVjNjE4OWI0ZjE4M2M3Yzk4MDEwYmY=
12
+ data.tar.gz: !binary |-
13
+ MTkxZGFjYzAzNDk5NWZhY2FkM2E2ODYzNmY1MzkzZTE4M2U0MzFhMDViZWI0
14
+ YmNhOTZiNzlhZWFlYWI2ZDRiOWY4NGE2NmQ1Y2I3OTljNjIyNWEwNzczYzRl
15
+ MWZhZmQwZTA4OThiMjA4ODAyYjE0MDU0YzBjMzViY2I3ZTJjYzM=
@@ -0,0 +1,5 @@
1
+ Gemfile.lock
2
+ .driplrc
3
+ ruby-druid-0.0.1.gem
4
+ spec/test.json
5
+ tmp/rspec_guard_result
data/Gemfile ADDED
@@ -0,0 +1,21 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ group :test, :development do
6
+ gem 'guard'
7
+ gem 'guard-bundler'
8
+ gem 'guard-rspec'
9
+ gem 'rb-fsevent'
10
+ gem 'rspec'
11
+ gem 'ruby_gntp'
12
+ gem 'webmock'
13
+ gem 'debugger'
14
+ end
15
+
16
+ group :console do
17
+ gem 'activesupport'
18
+ gem 'awesome_print'
19
+ gem 'ripl'
20
+ gem 'terminal-table'
21
+ end
@@ -0,0 +1,10 @@
1
+ # More info at https://github.com/guard/guard#readme
2
+ guard :bundler do
3
+ watch('Gemfile')
4
+ end
5
+
6
+ guard :rspec, :cli => '--color --format nested' do
7
+ watch(%r{^spec/.+_spec\.rb$})
8
+ watch(%r{^(.+)\.rb$}) {|m| "spec/#{m[1]}_spec.rb" }
9
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
10
+ end
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 madvertise Mobile Advertising GmbH
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included
12
+ in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,286 @@
1
+ # ruby-druid
2
+
3
+ [![Code Climate](https://codeclimate.com/github/madvertise/ruby-druid.png)](https://codeclimate.com/github/madvertise/ruby-druid)
4
+
5
+ A ruby client for [druid](https://github.com/madvertise/druid).
6
+
7
+ ruby-druid generates complete JSON queries by chaining methods.
8
+ The resulting JSON can be send directly to a druid server or handled seperatly.
9
+
10
+ ## bin/dripl
11
+
12
+ ruby-druid now includes a repl:
13
+
14
+ ```ruby
15
+ $ bin/dripl
16
+ >> metrics
17
+ [
18
+ [0] "actions"
19
+ ]
20
+
21
+ >> dimensions
22
+ [
23
+ [0] "actions"
24
+ ]
25
+
26
+ >> long_sum(:actions)
27
+ +---------+
28
+ | actions |
29
+ +---------+
30
+ | 98575 |
31
+ +---------+
32
+
33
+ >> long_sum(:actions)[-7.days].granularity(:day)
34
+ +-------------------------------+----------+
35
+ | timestamp | actions |
36
+ +-------------------------------+----------+
37
+ | 2013-03-28T00:00:00.000+01:00 | 93371 |
38
+ | 2013-03-29T00:00:00.000+01:00 | 448200 |
39
+ | 2013-03-30T00:00:00.000+01:00 | 117167 |
40
+ | 2013-03-31T00:00:00.000+01:00 | 828321 |
41
+ | 2013-04-01T00:00:00.000+02:00 | 261578 |
42
+ | 2013-04-02T00:00:00.000+02:00 | 05149 |
43
+ | 2013-04-03T00:00:00.000+02:00 | 27512 |
44
+ | 2013-04-04T00:00:00.000+02:00 | 18897 |
45
+ +-------------------------------+----------+
46
+
47
+ >> long_sum(:actions)[-7.days].granularity(:day).properties
48
+ {
49
+ :dataSource => "events",
50
+ :granularity => {
51
+ :type => "period",
52
+ :period => "P1D",
53
+ :timeZone => "Europe/Berlin"
54
+ },
55
+ :intervals => [
56
+ [0] "2013-03-28T00:00:00+01:00/2013-04-04T11:57:20+02:00"
57
+ ],
58
+ :queryType => :groupBy,
59
+ :aggregations => [
60
+ [0] {
61
+ :type => "longSum",
62
+ :name => :actions,
63
+ :fieldName => :actions
64
+ }
65
+ ]
66
+ }
67
+ ```
68
+
69
+ ## Getting started
70
+
71
+ In your Gemfile:
72
+
73
+ ```ruby
74
+ gem 'ruby-druid'
75
+ ```
76
+
77
+ In your code:
78
+
79
+ ```ruby
80
+ require 'druid'
81
+ ```
82
+
83
+ ## Usage
84
+
85
+ ```ruby
86
+ Druid::Client.new('zk1:2181,zk2:2181/druid').query('service/source')
87
+ ```
88
+
89
+ returns a query object on which all other methods can be called to create a full and valid druid query.
90
+
91
+ A query object can be sent like this:
92
+
93
+ ```ruby
94
+ Druid::Client.new('zk1:2181,zk2:2181/druid').query('service/source').send
95
+ #or
96
+ client = Druid::Client.new('zk1:2181,zk2:2181/druid')
97
+ query = Druid::Query.new('service/source')
98
+ client.send(query)
99
+ ```
100
+
101
+ The `send` method returns the parsed response from the druid server as an array.
102
+ If the response is not empty it contains one `ResponseRow` object for each row.
103
+ The timestamp by can be received by a method with the same name (i.e. `row.timestamp`),
104
+ all row values by hashlike syntax (i.e. `row['dimension'])
105
+
106
+ ### group_by
107
+
108
+ Sets the dimensions to group the data.
109
+
110
+ `queryType` is set automatically to `groupBy`.
111
+
112
+
113
+ ```ruby
114
+ Druid::Query.new('service/source').group_by([:dimension1, :dimension2])
115
+ ```
116
+
117
+ ### long_sum
118
+
119
+ ```ruby
120
+ Druid::Query.new('service/source').long_sum([:aggregate1, :aggregate2])
121
+ ```
122
+
123
+ ### postagg
124
+
125
+ A simple syntax for post aggregations with +,-,/,* can be used like:
126
+
127
+ ```ruby
128
+ query = Druid::Query.new('service/source').long_sum([:aggregate1, :aggregate2])
129
+
130
+ query.postagg{(aggregate2 + aggregate2).as output_field_name}
131
+ ```
132
+
133
+ Required fields for the postaggregation are fetched automatically by the library.
134
+
135
+ ### interval
136
+
137
+ The interval for the query takes a string with date and time or objects that provide a `iso8601` method
138
+
139
+ ```ruby
140
+ query = Druid::Query.new('service/source').long_sum(:aggregate1)
141
+
142
+ query.interval("2013-01-01T00", Time.now)
143
+ ```
144
+
145
+ ### granularity
146
+
147
+ granularity can be `:all`, `:none`, `:minute`, `:fifteen_minute`, `:thirthy_minute`, `:hour` or `:day`.
148
+
149
+ It can also be a period granularity as described in https://github.com/metamx/druid/wiki/Granularities.
150
+
151
+ The period `'day'` or `:day` will be interpreted as `'P1D'`.
152
+
153
+ If a period granularity is specifed, the (optional) second parameter is a time zone. It defaults
154
+ to the machines local time zone.
155
+
156
+ I.E:
157
+ ```ruby
158
+ query = Druid::Query.new('service/source').long_sum(:aggregate1)
159
+
160
+ query.granularity(:day)
161
+ ```
162
+
163
+ is (on my box) the same as
164
+
165
+ ```ruby
166
+ query = Druid::Query.new('service/source').long_sum(:aggregate1)
167
+
168
+ query.granularity('P1D', 'Europe/Berlin')
169
+ ```
170
+
171
+ ## having (for metrics)
172
+
173
+ ### having >
174
+
175
+ ```ruby
176
+ Druid::Query.new('service/source').having{metric > 10}
177
+ ```
178
+
179
+ ### having <
180
+
181
+ ```ruby
182
+ Druid::Query.new('service/source').having{metric < 10}
183
+ ```
184
+
185
+ ## filter (for dimensions)
186
+
187
+ Filters are set by the `filter` method. It takes a block or a hash as parameter.
188
+
189
+ Filters can be chained `filter{...}.filter{...}`
190
+
191
+ ### filter == , eq
192
+
193
+ ```ruby
194
+ Druid::Query.new('service/source').filter{dimension.eq 1}
195
+
196
+ #this is the same as
197
+
198
+ Druid::Query.new('service/source').filter{dimension == 1}
199
+ ```
200
+
201
+ ### filter != , neq
202
+
203
+ ```ruby
204
+ Druid::Query.new('service/source').filter{dimension.neq 1}
205
+
206
+ #this is the same as
207
+
208
+ Druid::Query.new('service/source').filter{dimension != 1}
209
+ ```
210
+
211
+ ### filter and
212
+
213
+ a logical or than can combine all other filters
214
+
215
+ ```ruby
216
+ Druid::Query.new('service/source').filter{dimension.neq 1 & dimension2.neq 2}
217
+ ```
218
+
219
+ ### filter or
220
+
221
+ a logical or than can combine all other filters
222
+
223
+ ```ruby
224
+ Druid::Query.new('service/source').filter{dimension.neq 1 | dimension2.neq 2}
225
+ ```
226
+
227
+ ### filter not
228
+
229
+ a logical not than can negate all other filter
230
+
231
+ ```ruby
232
+ Druid::Query.new('service/source').filter{!dimension.eq(1)}
233
+ ```
234
+
235
+ ### filter in
236
+
237
+ This filter creates a set of equals filters in an and filter.
238
+
239
+ ```ruby
240
+ Druid::Query.new('service/source').filter{dimension.in(1,2,3)}
241
+ ```
242
+
243
+ ### filter with hash syntax
244
+
245
+ sometimes it can be useful to use a hash syntax for filtering
246
+ for example if you already get them from a list or parameterhash
247
+
248
+ ```ruby
249
+ Druid::Query.new('service/source').filter{dimension => 1, dimension1 =>2, dimension2 => 3}
250
+
251
+ #this is the same as
252
+
253
+ Druid::Query.new('service/source').filter{dimension.eq(1) & dimension1.eq(2) & dimension2.eq(3)}
254
+ ```
255
+
256
+ ### filter >, <, >=, <=
257
+
258
+ ```ruby
259
+ Druid::Query.new('service/source').filter{dimension >= 1}
260
+ ```
261
+
262
+ ### filter javascript
263
+
264
+ ```ruby
265
+ Druid::Query.new('service/source').filter{a.javascript('dimension >= 1 && dimension < 5')}
266
+
267
+ #this also the same as
268
+
269
+ Druid::Query.new('service/source').filter{(dimension >= 1) & (dimension < 5)}
270
+ ```
271
+
272
+ ## Acknowledgements
273
+
274
+ Post aggregation expression parsing built with the help of [Squeel](https://github.com/ernie/squeel).
275
+
276
+ ## Contributions
277
+
278
+ ruby-druid is developed by madvertise Mobile Advertising GmbH
279
+
280
+ You can support us on different ways:
281
+
282
+ * Use ruby-druid, and let us know if you encounter anything that's broken or missing.
283
+ A failing spec is great. A pull request with your fix is even better!
284
+ * Spread the word about ruby-druid on Twitter, Facebook, and elsewhere.
285
+ * Work with us at madvertise on awesome stuff like this.
286
+ [Read the job description](http://madvertise.com/software-developer-ruby-fm-berlin) and send a mail to careers@madvertise.com.
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ $:.unshift(File.join(File.expand_path("../..", __FILE__), 'lib'))
5
+
6
+ $0 = "dripl"
7
+
8
+ def zookeeper(value)
9
+ @zk_uri = value
10
+ end
11
+
12
+ def uri(value)
13
+ puts "using 'uri' in the config is deprecated, use 'zookeeper' instead"
14
+ zookeeper value
15
+ end
16
+
17
+ def source(value)
18
+ @source = value
19
+ end
20
+
21
+ def options(value)
22
+ @options = value
23
+ end
24
+
25
+ begin
26
+ driplrc = File.read(File.join(File.expand_path("../..", __FILE__), '.driplrc'))
27
+ rescue
28
+ puts "You need to create a .driplrc, take a look at dot_driplrc_example"
29
+ exit 1
30
+ end
31
+
32
+ instance_eval(driplrc)
33
+
34
+ unless @zk_uri || (@options && @options[:static_setup])
35
+ puts "Your .driplrc is incomplete, please fix"
36
+ exit 1
37
+ end
38
+
39
+ require 'druid/console'
40
+ Druid::Console.new(@zk_uri, @source, @options)
@@ -0,0 +1,12 @@
1
+ ## your zookeeper config. For static scenarios (i.e. ssh tunnels) see options
2
+ ##
3
+ # zookeeper "localhost:2181/druid"
4
+
5
+ ## using options, you can disable zookeeper lookup
6
+ ## options[:static_setup], the key is the source name, the value is the brokers post uri
7
+ ##
8
+ # options :static_setup => { 'example/events' => 'http://localhost:8080/druid/v2/' }
9
+
10
+ ## dripl will default to use the first available data source. use this to override
11
+ ##
12
+ # source "example/events"
@@ -0,0 +1,8 @@
1
+ require 'druid/client'
2
+ require 'druid/query'
3
+ require 'druid/response_row'
4
+ require 'druid/zoo_handler'
5
+
6
+ module Druid
7
+
8
+ end
@@ -0,0 +1,95 @@
1
+ module Druid
2
+ class Client
3
+ TIMEOUT = 2 * 60 * 1000
4
+
5
+ def initialize(zookeeper_uri, opts = nil)
6
+ opts ||= {}
7
+
8
+ if opts[:static_setup] && !opts[:fallback]
9
+ @static = opts[:static_setup]
10
+ else
11
+ @backup = opts[:static_setup] if opts[:fallback]
12
+ zookeeper_caching_management!(zookeeper_uri, opts)
13
+ end
14
+ end
15
+
16
+ def send(query)
17
+ uri = data_source_uri(query.source)
18
+ raise "data source #{query.source} (currently) not available" unless uri
19
+
20
+ req = Net::HTTP::Post.new(uri.path, initheader = {'Content-Type' =>'application/json'})
21
+ req.body = query.to_json
22
+ puts req.body
23
+
24
+ response = Net::HTTP.new(uri.host, uri.port).start do |http|
25
+ http.read_timeout = TIMEOUT
26
+ http.request(req)
27
+ end
28
+
29
+ if response.code == "200"
30
+ JSON.parse(response.body).map{ |row| ResponseRow.new(row) }
31
+ else
32
+ raise "Request failed: #{response.code}: #{response.body}"
33
+ end
34
+ end
35
+
36
+ def query(id, &block)
37
+ uri = data_source_uri(id)
38
+ raise "data source #{id} (currently) not available" unless uri
39
+ query = Query.new(id, self)
40
+ return query unless block
41
+
42
+ send query
43
+ end
44
+
45
+ def zookeeper_caching_management!(zookeeper_uri, opts)
46
+ @zk = ZooHandler.new(zookeeper_uri, opts)
47
+
48
+ unless opts[:zk_keepalive]
49
+ @cached_data_sources = @zk.data_sources unless @zk.nil?
50
+
51
+ @zk.close!
52
+ end
53
+ end
54
+
55
+ def ds
56
+ @cached_data_sources || (@zk.data_sources unless @zk.nil?)
57
+ end
58
+
59
+ def data_sources
60
+ (ds.nil? ? @static : ds).keys
61
+ end
62
+
63
+ def data_source_uri(source)
64
+ uri = (ds.nil? ? @static : ds)[source]
65
+ begin
66
+ return URI(uri) if uri
67
+ rescue
68
+ return URI(@backup) if @backup
69
+ end
70
+ end
71
+
72
+ def data_source(source)
73
+ uri = data_source_uri(source)
74
+ raise "data source #{source} (currently) not available" unless uri
75
+
76
+ meta_path = "#{uri.path}datasources/#{source.split('/').last}"
77
+
78
+ req = Net::HTTP::Get.new(meta_path)
79
+
80
+ response = Net::HTTP.new(uri.host, uri.port).start do |http|
81
+ http.read_timeout = TIMEOUT
82
+ http.request(req)
83
+ end
84
+
85
+ if response.code == "200"
86
+ meta = JSON.parse(response.body)
87
+ meta.define_singleton_method(:dimensions) { self['dimensions'] }
88
+ meta.define_singleton_method(:metrics) { self['metrics'] }
89
+ meta
90
+ else
91
+ raise "Request failed: #{response.code}: #{response.body}"
92
+ end
93
+ end
94
+ end
95
+ end