ruby-druid 0.1.9 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,223 +1,489 @@
1
- require 'druid/serializable'
1
+ require 'time'
2
+ require 'iso8601'
3
+
4
+ require 'active_support/all'
5
+ require 'active_model'
6
+
7
+ require 'druid/granularity'
8
+ require 'druid/aggregation'
9
+ require 'druid/post_aggregation'
2
10
  require 'druid/filter'
11
+ require 'druid/context'
3
12
  require 'druid/having'
4
- require 'druid/post_aggregation'
5
-
6
- require 'time'
7
- require 'json'
8
13
 
9
14
  module Druid
10
15
  class Query
16
+ include ActiveModel::Model
11
17
 
12
- attr_reader :properties
13
-
14
- def initialize(source, client = nil)
15
- @properties = {}
16
- @client = client
18
+ attr_accessor :queryType
19
+ validates :queryType, inclusion: { in: %w(timeseries search timeBoundary groupBy segmentMetadata select topN dataSourceMetadata) }
17
20
 
18
- # set some defaults
19
- data_source(source)
20
- granularity(:all)
21
+ attr_accessor :dataSource
22
+ validates :dataSource, presence: true
21
23
 
22
- interval(today)
24
+ class IntervalsValidator < ActiveModel::EachValidator
25
+ def validate_each(record, attribute, value)
26
+ if !value.is_a?(Array) || value.blank?
27
+ record.errors.add(attribute, 'must be a list with at least one interval')
28
+ return
29
+ end
30
+ value.each do |interval|
31
+ parts = interval.to_s.split('/')
32
+ record.errors.add(attribute, 'must consist of two ISO8601 dates seperated by /') unless parts.length == 2
33
+ parts = parts.map do |ts|
34
+ ISO8601::DateTime.new(ts) rescue nil
35
+ end
36
+ record.errors.add(attribute, 'must consist of valid ISO8601 dates') unless parts.all?
37
+ record.errors.add(attribute, 'first date needs to be < second date') unless parts.first.to_time < parts.last.to_time
38
+ end
39
+ end
23
40
  end
24
41
 
25
- def today
26
- Time.now.to_date.to_time
42
+ attr_accessor :intervals
43
+ validates :intervals, intervals: true
44
+
45
+ class GranularityValidator < ActiveModel::EachValidator
46
+ TYPES = %w(timeseries search groupBy select topN)
47
+ SIMPLE = %w(all none minute fifteen_minute thirty_minute hour day)
48
+ def validate_each(record, attribute, value)
49
+ if TYPES.include?(record.queryType)
50
+ if value.is_a?(String)
51
+ record.errors.add(attribute, "must be one of #{SIMPLE.inspect}") unless SIMPLE.include?(value)
52
+ elsif value.is_a?(Granularity)
53
+ value.valid? # trigger validation
54
+ value.errors.messages.each do |k, v|
55
+ record.errors.add(attribute, { k => v })
56
+ end
57
+ else
58
+ record.errors.add(attribute, "invalid type or class: #{value.inspect}")
59
+ end
60
+ else
61
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
62
+ end
63
+ end
27
64
  end
28
65
 
29
- def send
30
- @client.send(self)
66
+ attr_accessor :granularity
67
+ validates :granularity, granularity: true
68
+
69
+ def granularity=(value)
70
+ if value.is_a?(String)
71
+ @granularity = value
72
+ elsif value.is_a?(Hash)
73
+ @granularity = Granularity.new(value)
74
+ else
75
+ @granularity = value
76
+ end
31
77
  end
32
78
 
33
- def query_type(type)
34
- @properties[:queryType] = type
35
- self
79
+ class DimensionsValidator < ActiveModel::EachValidator
80
+ TYPES = %w(groupBy select)
81
+ def validate_each(record, attribute, value)
82
+ if TYPES.include?(record.queryType)
83
+ record.errors.add(attribute, 'must be a list with at least one dimension') if !value.is_a?(Array) || value.blank?
84
+ else
85
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
86
+ end
87
+ end
36
88
  end
37
-
38
- def get_query_type()
39
- @properties[:queryType] || :groupBy
89
+
90
+ attr_accessor :dimensions
91
+ validates :dimensions, dimensions: true
92
+
93
+ class AggregationsValidator < ActiveModel::EachValidator
94
+ TYPES = %w(timeseries groupBy topN)
95
+ def validate_each(record, attribute, value)
96
+ if TYPES.include?(record.queryType)
97
+ value.each(&:valid?) # trigger validation
98
+ value.each do |avalue|
99
+ avalue.errors.messages.each do |k, v|
100
+ record.errors.add(attribute, { k => v })
101
+ end
102
+ end
103
+ else
104
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
105
+ end
106
+ end
40
107
  end
41
108
 
42
- def data_source(source)
43
- source = source.split('/')
44
- @properties[:dataSource] = source.last
45
- @service = source.first
46
- self
109
+ attr_accessor :aggregations
110
+ validates :aggregations, aggregations: true
111
+
112
+ def aggregations
113
+ @aggregations ||= []
47
114
  end
48
115
 
49
- def source
50
- "#{@service}/#{@properties[:dataSource]}"
116
+ def aggregations=(value)
117
+ if value.is_a?(Array)
118
+ @aggregations = value.map do |x|
119
+ Aggregation.new(x)
120
+ end
121
+ else
122
+ @aggregations = [value]
123
+ end
51
124
  end
52
125
 
53
- def group_by(*dimensions)
54
- query_type(:groupBy)
55
- @properties[:dimensions] = dimensions.flatten
56
- self
126
+ def aggregation_types
127
+ Set.new(@aggregations.map do |aggregation|
128
+ aggregation.type
129
+ end.flatten.compact)
57
130
  end
58
131
 
59
- def topn(dimension, metric, threshold)
60
- query_type(:topN)
61
- @properties[:dimension] = dimension
62
- @properties[:metric] = metric
63
- @properties[:threshold] = threshold
64
- self
132
+ def aggregation_names
133
+ Set.new(@aggregations.map do |aggregation|
134
+ [aggregation.fieldName] + [aggregation.fieldNames]
135
+ end.flatten.compact)
65
136
  end
66
-
67
- def time_series(*aggregations)
68
- query_type(:timeseries)
69
- #@properties[:aggregations] = aggregations.flatten
70
- self
137
+
138
+ class PostaggregationsValidator < ActiveModel::EachValidator
139
+ TYPES = %w(timeseries groupBy topN)
140
+ def validate_each(record, attribute, value)
141
+ if TYPES.include?(record.queryType)
142
+ value.each(&:valid?) # trigger validation
143
+ value.each do |avalue|
144
+ avalue.errors.messages.each do |msg|
145
+ record.errors.add(attribute, msg)
146
+ end
147
+ end
148
+ else
149
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
150
+ end
151
+ end
71
152
  end
72
153
 
73
- [:long_sum, :double_sum, :count].each do |method_name|
74
- agg_type = method_name.to_s.split('_')
75
- agg_type[1].capitalize! if agg_type.length > 1
76
- agg_type = agg_type.join
154
+ attr_accessor :postAggregations
155
+ validates :postAggregations, postaggregations: true
77
156
 
78
- define_method method_name do |*metrics|
79
- query_type(get_query_type())
80
- @properties[:aggregations] = [] if @properties[:aggregations].nil?
157
+ def postAggregations
158
+ @postAggregations ||= []
159
+ end
81
160
 
82
- metrics.flatten.each do |metric|
83
- @properties[:aggregations] << {
84
- :type => agg_type,
85
- :name => metric.to_s,
86
- :fieldName => metric.to_s
87
- } unless contains_aggregation?(metric)
161
+ def postAggregations=(value)
162
+ if value.is_a?(Array)
163
+ @postAggregations = value.map do |x|
164
+ PostAggregation.new(x)
88
165
  end
166
+ else
167
+ @postAggregations = [value]
168
+ end
169
+ end
89
170
 
90
- self
171
+ class FilterValidator < ActiveModel::EachValidator
172
+ TYPES = %w(timeseries search groupBy select topN)
173
+ def validate_each(record, attribute, value)
174
+ if value && TYPES.include?(record.queryType)
175
+ value.valid? # trigger validation
176
+ value.errors.messages.each do |k, v|
177
+ record.errors.add(attribute, { k => v })
178
+ end
179
+ else
180
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
181
+ end
91
182
  end
92
183
  end
93
184
 
94
- alias_method :sum, :long_sum
185
+ attr_accessor :filter
186
+ validates :filter, filter: true
95
187
 
96
- def postagg(type=:long, &block)
97
- post_agg = PostAggregation.new.instance_exec(&block)
98
- @properties[:postAggregations] ||= []
99
- @properties[:postAggregations] << post_agg
188
+ def filter=(value)
189
+ if value.is_a?(Hash)
190
+ @filter = Filter.new(value)
191
+ else
192
+ @filter = value
193
+ end
194
+ end
100
195
 
101
- # make sure, the required fields are in the query
102
- field_type = (type.to_s + '_sum').to_sym
103
- # ugly workaround, because SOMEONE overwrote send
104
- sum_method = self.method(field_type)
105
- sum_method.call(post_agg.get_field_names)
196
+ # groupBy
197
+ attr_accessor :having
106
198
 
107
- self
199
+ def having=(value)
200
+ if value.is_a?(Hash)
201
+ @having = Having.new(value)
202
+ else
203
+ @having = value
204
+ end
108
205
  end
109
206
 
110
- def postagg_double(&block)
111
- postagg(:double, &block)
112
- end
207
+ # groupBy
208
+ attr_accessor :limitSpec
113
209
 
114
- def filter(hash = nil, &block)
115
- if hash
116
- last = nil
117
- hash.each do |k,values|
118
- filter = FilterDimension.new(k).in(values)
119
- last = last ? last.&(filter) : filter
120
- end
121
- @properties[:filter] = @properties[:filter] ? @properties[:filter].&(last) : last
122
- end
123
- if block
124
- filter = Filter.new.instance_exec(&block)
125
- raise "Not a valid filter" unless filter.is_a? FilterParameter
126
- @properties[:filter] = @properties[:filter] ? @properties[:filter].&(filter) : filter
210
+ # search
211
+ attr_accessor :limit
212
+
213
+ # search
214
+ attr_accessor :searchDimensions
215
+
216
+ # search
217
+ attr_accessor :query
218
+
219
+ # search
220
+ attr_accessor :sort
221
+
222
+ # timeBoundary
223
+ attr_accessor :bound
224
+
225
+ # segementMetadata
226
+ attr_accessor :toInclude
227
+
228
+ # segementMetadata
229
+ attr_accessor :merge
230
+
231
+ # select
232
+ attr_accessor :metrics
233
+
234
+ # select
235
+ attr_accessor :pagingSpec
236
+
237
+ # topN
238
+ attr_accessor :dimension
239
+
240
+ # topN
241
+ attr_accessor :metric
242
+
243
+ # topN
244
+ attr_accessor :threshold
245
+
246
+ attr_accessor :context
247
+
248
+ def context=(value)
249
+ if value.is_a?(Hash)
250
+ @context = Context.new(value)
251
+ else
252
+ @context = value
127
253
  end
128
- self
129
254
  end
130
255
 
131
- def interval(from, to = Time.now)
132
- intervals([[from, to]])
256
+ def initialize(attributes = {})
257
+ super
258
+ @context ||= Context.new
133
259
  end
134
260
 
135
- def intervals(is)
136
- @properties[:intervals] = is.map{ |ii| mk_interval(ii[0], ii[1]) }
137
- self
261
+ def as_json(options = {})
262
+ super(options.merge(except: %w(errors validation_context)))
138
263
  end
139
264
 
140
- def having(&block)
141
- having = Having.new.instance_exec(&block)
265
+ def contains_aggregation?(metric)
266
+ aggregations.any? { |a| a.name.to_s == metric.to_s }
267
+ end
268
+
269
+ class Builder
270
+
271
+ attr_reader :query
272
+
273
+ def initialize
274
+ @query = Query.new
275
+ query_type(:timeseries)
276
+ interval(Time.now.utc.beginning_of_day)
277
+ end
278
+
279
+ def query_type(type)
280
+ @query.queryType = type.to_s
281
+ self
282
+ end
283
+
284
+ def data_source(source)
285
+ @query.dataSource = source.split('/').last
286
+ self
287
+ end
288
+
289
+ def interval(from, to = Time.now)
290
+ intervals([[from, to]])
291
+ end
292
+
293
+ def intervals(is)
294
+ @query.intervals = is.map do |from, to|
295
+ from = from.respond_to?(:iso8601) ? from.iso8601 : ISO8601::DateTime.new(from).to_s
296
+ to = to.respond_to?(:iso8601) ? to.iso8601 : ISO8601::DateTime.new(to).to_s
297
+ "#{from}/#{to}"
298
+ end
299
+ self
300
+ end
142
301
 
143
- if old_having = @properties[:having]
144
- if old_having.operator? && old_having.and?
145
- new_having = old_having
302
+ def last(duration)
303
+ interval(Time.now - duration)
304
+ end
305
+
306
+ def granularity(gran, time_zone = "UTC")
307
+ gran = gran.to_s
308
+ if %w(all none minute fifteen_minute thirty_minute hour day).include?(gran)
309
+ @query.granularity = gran
146
310
  else
147
- new_having = HavingOperator.new('and')
148
- new_having.add(old_having)
311
+ @query.granularity = Granularity.new({
312
+ type: 'period',
313
+ period: gran,
314
+ timeZone: time_zone
315
+ })
149
316
  end
150
- new_having.add(having)
151
- else
152
- new_having = having
317
+ self
153
318
  end
154
319
 
155
- @properties[:having] = new_having
156
- self
157
- end
320
+ ## query types
158
321
 
159
- alias_method :[], :interval
322
+ def metadata
323
+ query_type(:segmentMetadata)
324
+ @query.context.useCache = false
325
+ @query.context.populateCache = false
326
+ self
327
+ end
160
328
 
161
- def granularity(gran, time_zone = nil)
162
- gran = gran.to_s
163
- case gran
164
- when 'none', 'all', 'second', 'minute', 'fifteen_minute', 'thirty_minute', 'hour'
165
- @properties[:granularity] = gran
166
- return self
167
- when 'day'
168
- gran = 'P1D'
329
+ def timeseries
330
+ query_type(:timeseries)
331
+ self
169
332
  end
170
333
 
171
- time_zone ||= Time.now.strftime('%Z')
172
- # druid doesn't seem to understand 'CEST'
173
- # this is a work around
174
- time_zone = 'Europe/Berlin' if time_zone == 'CEST'
334
+ def group_by(*dimensions)
335
+ query_type(:groupBy)
336
+ @query.dimensions = dimensions.flatten
337
+ self
338
+ end
175
339
 
176
- @properties[:granularity] = {
177
- :type => 'period',
178
- :period => gran,
179
- :timeZone => time_zone
180
- }
181
- self
182
- end
340
+ def topn(dimension, metric, threshold)
341
+ query_type(:topN)
342
+ @query.dimension = dimension
343
+ @query.metric = metric
344
+ @query.threshold = threshold
345
+ self
346
+ end
183
347
 
184
- def to_json
185
- @properties.to_json
186
- end
348
+ def search(what = "", dimensions = [], limit = nil)
349
+ query_type(:search)
350
+ @query.searchDimensions = dimensions unless dimensions.empty?
351
+ @query.limit = limit if limit
352
+ # for now we always sort lexicographic
353
+ @query.sort = { type: 'lexicographic' }
354
+ @query.query = {
355
+ type: "insensitive_contains",
356
+ value: what
357
+ }
358
+ self
359
+ end
187
360
 
188
- def limit_spec(limit, columns)
189
- @properties[:limitSpec] = {
190
- :type => :default,
191
- :limit => limit,
192
- :columns => order_by_column_spec(columns)
193
- }
194
- self
195
- end
361
+ ### aggregations
362
+ [:count, :long_sum, :double_sum, :min, :max, :hyper_unique].each do |method_name|
363
+ define_method method_name do |*metrics|
364
+ metrics.flatten.compact.each do |metric|
365
+ @query.aggregations << Aggregation.new({
366
+ type: method_name.to_s.camelize(:lower),
367
+ name: metric,
368
+ fieldName: metric,
369
+ }) unless @query.contains_aggregation?(metric)
370
+ end
371
+ self
372
+ end
373
+ end
196
374
 
197
- private
375
+ def histograms(metrics)
376
+ metrics.each{|m| histogram(m) }
377
+ self
378
+ end
198
379
 
199
- def order_by_column_spec(columns)
200
- columns.map do |dimension, direction|
201
- {
202
- :dimension => dimension,
203
- :direction => direction
204
- }
380
+ def histogram(metric, type = "equalBuckets", args = {})
381
+ @query.aggregations << Aggregation.new({
382
+ type: "approxHistogramFold",
383
+ name: "raw_#{metric}",
384
+ fieldName: metric,
385
+ })
386
+ type = type.dup
387
+ type[0] = type[0].upcase
388
+ options = args.dup.merge({
389
+ name: metric,
390
+ fieldName: "raw_#{metric}"
391
+ })
392
+ @query.postAggregations << ::Druid.const_get("PostAggregationHistogram#{type}").new(options)
393
+ self
205
394
  end
206
- end
207
395
 
208
- def mk_interval(from, to)
209
- from = today + from if from.is_a?(Fixnum)
210
- to = today + to if to.is_a?(Fixnum)
396
+ alias_method :sum, :long_sum
211
397
 
212
- from = DateTime.parse(from.to_s) unless from.respond_to? :iso8601
213
- to = DateTime.parse(to.to_s) unless to.respond_to? :iso8601
214
- "#{from.iso8601}/#{to.iso8601}"
215
- end
398
+ def cardinality(metric, dimensions, by_row = false)
399
+ @query.aggregations << Aggregation.new({
400
+ type: 'cardinality',
401
+ name: metric,
402
+ fieldNames: dimensions,
403
+ byRow: by_row,
404
+ }) unless @query.contains_aggregation?(metric)
405
+ self
406
+ end
216
407
 
217
- def contains_aggregation?(metric)
218
- return false if @properties[:aggregations].nil?
219
- @properties[:aggregations].index { |aggregation| aggregation[:fieldName] == metric.to_s }
408
+ def js_aggregation(metric, columns, functions)
409
+ @query.aggregations << Aggregation.new({
410
+ type: 'javascript',
411
+ name: metric,
412
+ fieldNames: columns,
413
+ fnAggregate: functions[:aggregate],
414
+ fnCombine: functions[:combine],
415
+ fnReset: functions[:reset],
416
+ }) unless @query.contains_aggregation?(metric)
417
+ self
418
+ end
419
+
420
+ ## post aggregations
421
+
422
+ def postagg(type = :long_sum, &block)
423
+ post_agg = PostAggregation.new.instance_exec(&block)
424
+ @query.postAggregations << post_agg
425
+ # make sure, the required fields are in the query
426
+ self.method(type).call(post_agg.field_names)
427
+ self
428
+ end
429
+
430
+ ## filters
431
+
432
+ def filter(hash = nil, type = :in, &block)
433
+ filter_from_hash(hash, type) if hash
434
+ filter_from_block(&block) if block
435
+ self
436
+ end
437
+
438
+ def filter_from_hash(hash, type = :in)
439
+ last = nil
440
+ hash.each do |k, values|
441
+ filter = DimensionFilter.new(dimension: k).__send__(type, values)
442
+ last = last ? last.&(filter) : filter
443
+ end
444
+ @query.filter = @query.filter ? @query.filter.&(last) : last
445
+ end
446
+
447
+ def filter_from_block(&block)
448
+ filter = Filter.new.instance_exec(&block)
449
+ @query.filter = @query.filter ? @query.filter.&(filter) : filter
450
+ end
451
+
452
+ ## having
453
+
454
+ def having(hash = nil, &block)
455
+ having_from_hash(hash) if hash
456
+ having_from_block(&block) if block
457
+ self
458
+ end
459
+
460
+ def having_from_block(&block)
461
+ chain_having(Having.new.instance_exec(&block))
462
+ end
463
+
464
+ def having_from_hash(h)
465
+ chain_having(Having.new(h))
466
+ end
467
+
468
+ def chain_having(having)
469
+ having = @query.having.chain(having) if @query.having
470
+ @query.having = having
471
+ self
472
+ end
473
+
474
+ ### limit/sort
475
+
476
+ def limit(limit, columns)
477
+ @query.limitSpec = {
478
+ type: :default,
479
+ limit: limit,
480
+ columns: columns.map do |dimension, direction|
481
+ { dimension: dimension, direction: direction }
482
+ end
483
+ }
484
+ self
485
+ end
220
486
  end
221
- end
222
487
 
488
+ end
223
489
  end