ruby-druid 0.1.9 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,223 +1,489 @@
1
- require 'druid/serializable'
1
+ require 'time'
2
+ require 'iso8601'
3
+
4
+ require 'active_support/all'
5
+ require 'active_model'
6
+
7
+ require 'druid/granularity'
8
+ require 'druid/aggregation'
9
+ require 'druid/post_aggregation'
2
10
  require 'druid/filter'
11
+ require 'druid/context'
3
12
  require 'druid/having'
4
- require 'druid/post_aggregation'
5
-
6
- require 'time'
7
- require 'json'
8
13
 
9
14
  module Druid
10
15
  class Query
16
+ include ActiveModel::Model
11
17
 
12
- attr_reader :properties
13
-
14
- def initialize(source, client = nil)
15
- @properties = {}
16
- @client = client
18
+ attr_accessor :queryType
19
+ validates :queryType, inclusion: { in: %w(timeseries search timeBoundary groupBy segmentMetadata select topN dataSourceMetadata) }
17
20
 
18
- # set some defaults
19
- data_source(source)
20
- granularity(:all)
21
+ attr_accessor :dataSource
22
+ validates :dataSource, presence: true
21
23
 
22
- interval(today)
24
+ class IntervalsValidator < ActiveModel::EachValidator
25
+ def validate_each(record, attribute, value)
26
+ if !value.is_a?(Array) || value.blank?
27
+ record.errors.add(attribute, 'must be a list with at least one interval')
28
+ return
29
+ end
30
+ value.each do |interval|
31
+ parts = interval.to_s.split('/')
32
+ record.errors.add(attribute, 'must consist of two ISO8601 dates seperated by /') unless parts.length == 2
33
+ parts = parts.map do |ts|
34
+ ISO8601::DateTime.new(ts) rescue nil
35
+ end
36
+ record.errors.add(attribute, 'must consist of valid ISO8601 dates') unless parts.all?
37
+ record.errors.add(attribute, 'first date needs to be < second date') unless parts.first.to_time < parts.last.to_time
38
+ end
39
+ end
23
40
  end
24
41
 
25
- def today
26
- Time.now.to_date.to_time
42
+ attr_accessor :intervals
43
+ validates :intervals, intervals: true
44
+
45
+ class GranularityValidator < ActiveModel::EachValidator
46
+ TYPES = %w(timeseries search groupBy select topN)
47
+ SIMPLE = %w(all none minute fifteen_minute thirty_minute hour day)
48
+ def validate_each(record, attribute, value)
49
+ if TYPES.include?(record.queryType)
50
+ if value.is_a?(String)
51
+ record.errors.add(attribute, "must be one of #{SIMPLE.inspect}") unless SIMPLE.include?(value)
52
+ elsif value.is_a?(Granularity)
53
+ value.valid? # trigger validation
54
+ value.errors.messages.each do |k, v|
55
+ record.errors.add(attribute, { k => v })
56
+ end
57
+ else
58
+ record.errors.add(attribute, "invalid type or class: #{value.inspect}")
59
+ end
60
+ else
61
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
62
+ end
63
+ end
27
64
  end
28
65
 
29
- def send
30
- @client.send(self)
66
+ attr_accessor :granularity
67
+ validates :granularity, granularity: true
68
+
69
+ def granularity=(value)
70
+ if value.is_a?(String)
71
+ @granularity = value
72
+ elsif value.is_a?(Hash)
73
+ @granularity = Granularity.new(value)
74
+ else
75
+ @granularity = value
76
+ end
31
77
  end
32
78
 
33
- def query_type(type)
34
- @properties[:queryType] = type
35
- self
79
+ class DimensionsValidator < ActiveModel::EachValidator
80
+ TYPES = %w(groupBy select)
81
+ def validate_each(record, attribute, value)
82
+ if TYPES.include?(record.queryType)
83
+ record.errors.add(attribute, 'must be a list with at least one dimension') if !value.is_a?(Array) || value.blank?
84
+ else
85
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
86
+ end
87
+ end
36
88
  end
37
-
38
- def get_query_type()
39
- @properties[:queryType] || :groupBy
89
+
90
+ attr_accessor :dimensions
91
+ validates :dimensions, dimensions: true
92
+
93
+ class AggregationsValidator < ActiveModel::EachValidator
94
+ TYPES = %w(timeseries groupBy topN)
95
+ def validate_each(record, attribute, value)
96
+ if TYPES.include?(record.queryType)
97
+ value.each(&:valid?) # trigger validation
98
+ value.each do |avalue|
99
+ avalue.errors.messages.each do |k, v|
100
+ record.errors.add(attribute, { k => v })
101
+ end
102
+ end
103
+ else
104
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
105
+ end
106
+ end
40
107
  end
41
108
 
42
- def data_source(source)
43
- source = source.split('/')
44
- @properties[:dataSource] = source.last
45
- @service = source.first
46
- self
109
+ attr_accessor :aggregations
110
+ validates :aggregations, aggregations: true
111
+
112
+ def aggregations
113
+ @aggregations ||= []
47
114
  end
48
115
 
49
- def source
50
- "#{@service}/#{@properties[:dataSource]}"
116
+ def aggregations=(value)
117
+ if value.is_a?(Array)
118
+ @aggregations = value.map do |x|
119
+ Aggregation.new(x)
120
+ end
121
+ else
122
+ @aggregations = [value]
123
+ end
51
124
  end
52
125
 
53
- def group_by(*dimensions)
54
- query_type(:groupBy)
55
- @properties[:dimensions] = dimensions.flatten
56
- self
126
+ def aggregation_types
127
+ Set.new(@aggregations.map do |aggregation|
128
+ aggregation.type
129
+ end.flatten.compact)
57
130
  end
58
131
 
59
- def topn(dimension, metric, threshold)
60
- query_type(:topN)
61
- @properties[:dimension] = dimension
62
- @properties[:metric] = metric
63
- @properties[:threshold] = threshold
64
- self
132
+ def aggregation_names
133
+ Set.new(@aggregations.map do |aggregation|
134
+ [aggregation.fieldName] + [aggregation.fieldNames]
135
+ end.flatten.compact)
65
136
  end
66
-
67
- def time_series(*aggregations)
68
- query_type(:timeseries)
69
- #@properties[:aggregations] = aggregations.flatten
70
- self
137
+
138
+ class PostaggregationsValidator < ActiveModel::EachValidator
139
+ TYPES = %w(timeseries groupBy topN)
140
+ def validate_each(record, attribute, value)
141
+ if TYPES.include?(record.queryType)
142
+ value.each(&:valid?) # trigger validation
143
+ value.each do |avalue|
144
+ avalue.errors.messages.each do |msg|
145
+ record.errors.add(attribute, msg)
146
+ end
147
+ end
148
+ else
149
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
150
+ end
151
+ end
71
152
  end
72
153
 
73
- [:long_sum, :double_sum, :count].each do |method_name|
74
- agg_type = method_name.to_s.split('_')
75
- agg_type[1].capitalize! if agg_type.length > 1
76
- agg_type = agg_type.join
154
+ attr_accessor :postAggregations
155
+ validates :postAggregations, postaggregations: true
77
156
 
78
- define_method method_name do |*metrics|
79
- query_type(get_query_type())
80
- @properties[:aggregations] = [] if @properties[:aggregations].nil?
157
+ def postAggregations
158
+ @postAggregations ||= []
159
+ end
81
160
 
82
- metrics.flatten.each do |metric|
83
- @properties[:aggregations] << {
84
- :type => agg_type,
85
- :name => metric.to_s,
86
- :fieldName => metric.to_s
87
- } unless contains_aggregation?(metric)
161
+ def postAggregations=(value)
162
+ if value.is_a?(Array)
163
+ @postAggregations = value.map do |x|
164
+ PostAggregation.new(x)
88
165
  end
166
+ else
167
+ @postAggregations = [value]
168
+ end
169
+ end
89
170
 
90
- self
171
+ class FilterValidator < ActiveModel::EachValidator
172
+ TYPES = %w(timeseries search groupBy select topN)
173
+ def validate_each(record, attribute, value)
174
+ if value && TYPES.include?(record.queryType)
175
+ value.valid? # trigger validation
176
+ value.errors.messages.each do |k, v|
177
+ record.errors.add(attribute, { k => v })
178
+ end
179
+ else
180
+ record.errors.add(attribute, "is not supported by type=#{record.queryType}") if value
181
+ end
91
182
  end
92
183
  end
93
184
 
94
- alias_method :sum, :long_sum
185
+ attr_accessor :filter
186
+ validates :filter, filter: true
95
187
 
96
- def postagg(type=:long, &block)
97
- post_agg = PostAggregation.new.instance_exec(&block)
98
- @properties[:postAggregations] ||= []
99
- @properties[:postAggregations] << post_agg
188
+ def filter=(value)
189
+ if value.is_a?(Hash)
190
+ @filter = Filter.new(value)
191
+ else
192
+ @filter = value
193
+ end
194
+ end
100
195
 
101
- # make sure, the required fields are in the query
102
- field_type = (type.to_s + '_sum').to_sym
103
- # ugly workaround, because SOMEONE overwrote send
104
- sum_method = self.method(field_type)
105
- sum_method.call(post_agg.get_field_names)
196
+ # groupBy
197
+ attr_accessor :having
106
198
 
107
- self
199
+ def having=(value)
200
+ if value.is_a?(Hash)
201
+ @having = Having.new(value)
202
+ else
203
+ @having = value
204
+ end
108
205
  end
109
206
 
110
- def postagg_double(&block)
111
- postagg(:double, &block)
112
- end
207
+ # groupBy
208
+ attr_accessor :limitSpec
113
209
 
114
- def filter(hash = nil, &block)
115
- if hash
116
- last = nil
117
- hash.each do |k,values|
118
- filter = FilterDimension.new(k).in(values)
119
- last = last ? last.&(filter) : filter
120
- end
121
- @properties[:filter] = @properties[:filter] ? @properties[:filter].&(last) : last
122
- end
123
- if block
124
- filter = Filter.new.instance_exec(&block)
125
- raise "Not a valid filter" unless filter.is_a? FilterParameter
126
- @properties[:filter] = @properties[:filter] ? @properties[:filter].&(filter) : filter
210
+ # search
211
+ attr_accessor :limit
212
+
213
+ # search
214
+ attr_accessor :searchDimensions
215
+
216
+ # search
217
+ attr_accessor :query
218
+
219
+ # search
220
+ attr_accessor :sort
221
+
222
+ # timeBoundary
223
+ attr_accessor :bound
224
+
225
+ # segementMetadata
226
+ attr_accessor :toInclude
227
+
228
+ # segementMetadata
229
+ attr_accessor :merge
230
+
231
+ # select
232
+ attr_accessor :metrics
233
+
234
+ # select
235
+ attr_accessor :pagingSpec
236
+
237
+ # topN
238
+ attr_accessor :dimension
239
+
240
+ # topN
241
+ attr_accessor :metric
242
+
243
+ # topN
244
+ attr_accessor :threshold
245
+
246
+ attr_accessor :context
247
+
248
+ def context=(value)
249
+ if value.is_a?(Hash)
250
+ @context = Context.new(value)
251
+ else
252
+ @context = value
127
253
  end
128
- self
129
254
  end
130
255
 
131
- def interval(from, to = Time.now)
132
- intervals([[from, to]])
256
+ def initialize(attributes = {})
257
+ super
258
+ @context ||= Context.new
133
259
  end
134
260
 
135
- def intervals(is)
136
- @properties[:intervals] = is.map{ |ii| mk_interval(ii[0], ii[1]) }
137
- self
261
+ def as_json(options = {})
262
+ super(options.merge(except: %w(errors validation_context)))
138
263
  end
139
264
 
140
- def having(&block)
141
- having = Having.new.instance_exec(&block)
265
+ def contains_aggregation?(metric)
266
+ aggregations.any? { |a| a.name.to_s == metric.to_s }
267
+ end
268
+
269
+ class Builder
270
+
271
+ attr_reader :query
272
+
273
+ def initialize
274
+ @query = Query.new
275
+ query_type(:timeseries)
276
+ interval(Time.now.utc.beginning_of_day)
277
+ end
278
+
279
+ def query_type(type)
280
+ @query.queryType = type.to_s
281
+ self
282
+ end
283
+
284
+ def data_source(source)
285
+ @query.dataSource = source.split('/').last
286
+ self
287
+ end
288
+
289
+ def interval(from, to = Time.now)
290
+ intervals([[from, to]])
291
+ end
292
+
293
+ def intervals(is)
294
+ @query.intervals = is.map do |from, to|
295
+ from = from.respond_to?(:iso8601) ? from.iso8601 : ISO8601::DateTime.new(from).to_s
296
+ to = to.respond_to?(:iso8601) ? to.iso8601 : ISO8601::DateTime.new(to).to_s
297
+ "#{from}/#{to}"
298
+ end
299
+ self
300
+ end
142
301
 
143
- if old_having = @properties[:having]
144
- if old_having.operator? && old_having.and?
145
- new_having = old_having
302
+ def last(duration)
303
+ interval(Time.now - duration)
304
+ end
305
+
306
+ def granularity(gran, time_zone = "UTC")
307
+ gran = gran.to_s
308
+ if %w(all none minute fifteen_minute thirty_minute hour day).include?(gran)
309
+ @query.granularity = gran
146
310
  else
147
- new_having = HavingOperator.new('and')
148
- new_having.add(old_having)
311
+ @query.granularity = Granularity.new({
312
+ type: 'period',
313
+ period: gran,
314
+ timeZone: time_zone
315
+ })
149
316
  end
150
- new_having.add(having)
151
- else
152
- new_having = having
317
+ self
153
318
  end
154
319
 
155
- @properties[:having] = new_having
156
- self
157
- end
320
+ ## query types
158
321
 
159
- alias_method :[], :interval
322
+ def metadata
323
+ query_type(:segmentMetadata)
324
+ @query.context.useCache = false
325
+ @query.context.populateCache = false
326
+ self
327
+ end
160
328
 
161
- def granularity(gran, time_zone = nil)
162
- gran = gran.to_s
163
- case gran
164
- when 'none', 'all', 'second', 'minute', 'fifteen_minute', 'thirty_minute', 'hour'
165
- @properties[:granularity] = gran
166
- return self
167
- when 'day'
168
- gran = 'P1D'
329
+ def timeseries
330
+ query_type(:timeseries)
331
+ self
169
332
  end
170
333
 
171
- time_zone ||= Time.now.strftime('%Z')
172
- # druid doesn't seem to understand 'CEST'
173
- # this is a work around
174
- time_zone = 'Europe/Berlin' if time_zone == 'CEST'
334
+ def group_by(*dimensions)
335
+ query_type(:groupBy)
336
+ @query.dimensions = dimensions.flatten
337
+ self
338
+ end
175
339
 
176
- @properties[:granularity] = {
177
- :type => 'period',
178
- :period => gran,
179
- :timeZone => time_zone
180
- }
181
- self
182
- end
340
+ def topn(dimension, metric, threshold)
341
+ query_type(:topN)
342
+ @query.dimension = dimension
343
+ @query.metric = metric
344
+ @query.threshold = threshold
345
+ self
346
+ end
183
347
 
184
- def to_json
185
- @properties.to_json
186
- end
348
+ def search(what = "", dimensions = [], limit = nil)
349
+ query_type(:search)
350
+ @query.searchDimensions = dimensions unless dimensions.empty?
351
+ @query.limit = limit if limit
352
+ # for now we always sort lexicographic
353
+ @query.sort = { type: 'lexicographic' }
354
+ @query.query = {
355
+ type: "insensitive_contains",
356
+ value: what
357
+ }
358
+ self
359
+ end
187
360
 
188
- def limit_spec(limit, columns)
189
- @properties[:limitSpec] = {
190
- :type => :default,
191
- :limit => limit,
192
- :columns => order_by_column_spec(columns)
193
- }
194
- self
195
- end
361
+ ### aggregations
362
+ [:count, :long_sum, :double_sum, :min, :max, :hyper_unique].each do |method_name|
363
+ define_method method_name do |*metrics|
364
+ metrics.flatten.compact.each do |metric|
365
+ @query.aggregations << Aggregation.new({
366
+ type: method_name.to_s.camelize(:lower),
367
+ name: metric,
368
+ fieldName: metric,
369
+ }) unless @query.contains_aggregation?(metric)
370
+ end
371
+ self
372
+ end
373
+ end
196
374
 
197
- private
375
+ def histograms(metrics)
376
+ metrics.each{|m| histogram(m) }
377
+ self
378
+ end
198
379
 
199
- def order_by_column_spec(columns)
200
- columns.map do |dimension, direction|
201
- {
202
- :dimension => dimension,
203
- :direction => direction
204
- }
380
+ def histogram(metric, type = "equalBuckets", args = {})
381
+ @query.aggregations << Aggregation.new({
382
+ type: "approxHistogramFold",
383
+ name: "raw_#{metric}",
384
+ fieldName: metric,
385
+ })
386
+ type = type.dup
387
+ type[0] = type[0].upcase
388
+ options = args.dup.merge({
389
+ name: metric,
390
+ fieldName: "raw_#{metric}"
391
+ })
392
+ @query.postAggregations << ::Druid.const_get("PostAggregationHistogram#{type}").new(options)
393
+ self
205
394
  end
206
- end
207
395
 
208
- def mk_interval(from, to)
209
- from = today + from if from.is_a?(Fixnum)
210
- to = today + to if to.is_a?(Fixnum)
396
+ alias_method :sum, :long_sum
211
397
 
212
- from = DateTime.parse(from.to_s) unless from.respond_to? :iso8601
213
- to = DateTime.parse(to.to_s) unless to.respond_to? :iso8601
214
- "#{from.iso8601}/#{to.iso8601}"
215
- end
398
+ def cardinality(metric, dimensions, by_row = false)
399
+ @query.aggregations << Aggregation.new({
400
+ type: 'cardinality',
401
+ name: metric,
402
+ fieldNames: dimensions,
403
+ byRow: by_row,
404
+ }) unless @query.contains_aggregation?(metric)
405
+ self
406
+ end
216
407
 
217
- def contains_aggregation?(metric)
218
- return false if @properties[:aggregations].nil?
219
- @properties[:aggregations].index { |aggregation| aggregation[:fieldName] == metric.to_s }
408
+ def js_aggregation(metric, columns, functions)
409
+ @query.aggregations << Aggregation.new({
410
+ type: 'javascript',
411
+ name: metric,
412
+ fieldNames: columns,
413
+ fnAggregate: functions[:aggregate],
414
+ fnCombine: functions[:combine],
415
+ fnReset: functions[:reset],
416
+ }) unless @query.contains_aggregation?(metric)
417
+ self
418
+ end
419
+
420
+ ## post aggregations
421
+
422
+ def postagg(type = :long_sum, &block)
423
+ post_agg = PostAggregation.new.instance_exec(&block)
424
+ @query.postAggregations << post_agg
425
+ # make sure, the required fields are in the query
426
+ self.method(type).call(post_agg.field_names)
427
+ self
428
+ end
429
+
430
+ ## filters
431
+
432
+ def filter(hash = nil, type = :in, &block)
433
+ filter_from_hash(hash, type) if hash
434
+ filter_from_block(&block) if block
435
+ self
436
+ end
437
+
438
+ def filter_from_hash(hash, type = :in)
439
+ last = nil
440
+ hash.each do |k, values|
441
+ filter = DimensionFilter.new(dimension: k).__send__(type, values)
442
+ last = last ? last.&(filter) : filter
443
+ end
444
+ @query.filter = @query.filter ? @query.filter.&(last) : last
445
+ end
446
+
447
+ def filter_from_block(&block)
448
+ filter = Filter.new.instance_exec(&block)
449
+ @query.filter = @query.filter ? @query.filter.&(filter) : filter
450
+ end
451
+
452
+ ## having
453
+
454
+ def having(hash = nil, &block)
455
+ having_from_hash(hash) if hash
456
+ having_from_block(&block) if block
457
+ self
458
+ end
459
+
460
+ def having_from_block(&block)
461
+ chain_having(Having.new.instance_exec(&block))
462
+ end
463
+
464
+ def having_from_hash(h)
465
+ chain_having(Having.new(h))
466
+ end
467
+
468
+ def chain_having(having)
469
+ having = @query.having.chain(having) if @query.having
470
+ @query.having = having
471
+ self
472
+ end
473
+
474
+ ### limit/sort
475
+
476
+ def limit(limit, columns)
477
+ @query.limitSpec = {
478
+ type: :default,
479
+ limit: limit,
480
+ columns: columns.map do |dimension, direction|
481
+ { dimension: dimension, direction: direction }
482
+ end
483
+ }
484
+ self
485
+ end
220
486
  end
221
- end
222
487
 
488
+ end
223
489
  end