claw_druid 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/array.rb +7 -0
  3. data/lib/claw_druid.rb +413 -0
  4. metadata +115 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f777ce0e3f8ad81341da2ff02e8fbd09ef4b8db8
4
+ data.tar.gz: 339d80e1f87b67e3845f4b1464f24bc65a36b812
5
+ SHA512:
6
+ metadata.gz: e7a460e6cb7ae7f578369e98b592bd5a691d0fc216eac65c9816ecf578b5b21603f89c70a3e5b3e707da1d31d5ab8fbe36b86baf92203f51aa15aa6ad9f1b719
7
+ data.tar.gz: 08730e5bb37f79f9f3452659e156802b2868e6008603e78f8a80142bf5d7e136b65bc604e77ef3190ed8efa9c5c51d034d6265cef1651431ea2393c0edb257d1
data/lib/array.rb ADDED
@@ -0,0 +1,7 @@
1
+ class Array
2
+ def except
3
+ result = self.select{|value| yield(value) }
4
+ self.delete_if{|value| yield(value) }
5
+ result
6
+ end
7
+ end
data/lib/claw_druid.rb ADDED
@@ -0,0 +1,413 @@
1
+ require 'httparty'
2
+ require 'json'
3
+ require 'awesome_print'
4
+ require 'active_support/all'
5
+ require_relative './array'
6
+
7
+ class ClawDruid
8
+ include Enumerable
9
+
10
+ THRESHOLD = ENV["DEBUG"] ? 5 : 30
11
+ OPERATIONS = {
12
+ '<' => "lessThan",
13
+ '>' => 'greaterThan',
14
+ '=' => 'equalTo'
15
+ }
16
+
17
+ FnAggregates = {
18
+ "min" => "return Math.min(current, (COLUMN));",
19
+ "max" => "return Math.max(current, (COLUMN));",
20
+ "sum" => "return current + (COLUMN);"
21
+ }
22
+
23
+ TopN = "topN"
24
+ GroupBy = "groupBy"
25
+ TimeSeries = "timeseries"
26
+ TimeBoundary = "timeBoundary"
27
+ SegmentMetaData = "segmentMetadata"
28
+ DataSourceMetaData = "dataSourceMetadata"
29
+
30
+ Permit_Properties = {
31
+ TopN => [:queryType, :dataSource, :intervals, :granularity, :filter, :aggregations, :postAggregations, :dimension, :threshold, :metric, :context],
32
+ GroupBy => [:queryType, :dataSource, :dimensions, :limitSpec, :having, :granularity, :filter, :aggregations, :postAggregations, :intervals, :context],
33
+ TimeSeries => [:queryType, :dataSource, :descending, :intervals, :granularity, :filter, :aggregations, :postAggregations, :context],
34
+ TimeBoundary => [:queryType, :dataSource, :bound, :filter, :context],
35
+ SegmentMetaData => [:queryType, :dataSource, :intervals, :toInclude, :merge, :context, :analysisTypes, :lenientAggregatorMerge],
36
+ DataSourceMetaData => [:queryType, :dataSource, :context],
37
+ }
38
+
39
+ def initialize(params = {})
40
+ @url = params[:url]
41
+ @params = {dataSource: params[:source], granularity: "all", queryType: "select"}
42
+ @threshold = params[:threshold] || THRESHOLD
43
+
44
+ # The page_identifiers of every query, the key is the params.hash of the query, the value is a identifiers like "publisher_daily_report_2017-02-02T00:00:00.000Z_2017-02-04T00:00:00.000Z_2017-03-30T12:10:27.053Z"
45
+ @paging_identifiers = {}
46
+ end
47
+
48
+ def group(*dimensions)
49
+ dimensions = dimensions[0] if dimensions.count == 1 && dimensions[0].is_a?(Array)
50
+
51
+ @params[:queryType] = GroupBy
52
+
53
+ lookup_dimensions = dimensions.except{|dimension| dimension.is_a? Hash }
54
+ select_lookup(lookup_dimensions)
55
+
56
+ if dimensions && dimensions.count > 0
57
+ @params[:dimensions] ||= []
58
+ @params[:dimensions] += dimensions.map(&:to_s).map(&:strip)
59
+ end
60
+ @params.delete(:metrics)
61
+ self
62
+ end
63
+
64
+ def select(*columns)
65
+ # Split the columns like ['sum(column_a) as sum_a, column_b']
66
+ columns = columns[0].split("\, ") if columns.count == 1 && columns[0].is_a?(String) && columns[0]["\, "]
67
+ columns = columns[0] if columns.count == 1 && columns[0].is_a?(Array)
68
+
69
+ return self if columns.all?{|column| column.blank? }
70
+
71
+ # Add the 'i' to regex to be case-insensitive, cause the sum, max and min could be SUM, MAX and MIN
72
+ post_columns = columns.except{|column| column[/(sum|max|min|count).+[\+\-\*\/]/i] }
73
+ @params[:postAggregations] = post_columns.map{|post_column| post_chain(post_column) } unless post_columns.blank?
74
+
75
+ method_columns = columns.except{|column| column.is_a?(String) && column[/(sum|max|min|count)\(.+\)/i] }
76
+ method_columns.each{|column| method_column(column) }
77
+
78
+ lookup_columns = columns.except{|column| column.is_a? Hash }
79
+ select_lookup(lookup_columns)
80
+
81
+ if columns && columns.count > 0
82
+ @params[:metrics] ||= []
83
+ @params[:metrics] += columns.map(&:to_s).map(&:strip)
84
+ end
85
+ self
86
+ end
87
+
88
+ def meta_method(method, columns)
89
+ columns = columns[0] if columns.count == 1 and columns[0].is_a?(Array)
90
+
91
+ @params[:queryType] ||= TimeSeries
92
+ @params[:aggregations] ||= []
93
+ @params[:aggregations] += columns.map{|column, naming|
94
+ naming ||= "#{method}(#{column})"
95
+ fnAggregate = FnAggregates[method.to_s].gsub("COLUMN", column.to_s)
96
+ if column[/( [\+\-\*\/] )/]
97
+ fields = column.split(/ [\+\-\*\/] /)
98
+ {
99
+ type: "javascript",
100
+ name: naming,
101
+ fieldNames: fields,
102
+ fnAggregate: "function(current, #{fields.join(', ')}) { #{fnAggregate} }",
103
+ fnCombine: "function(partialA, partialB) { return partialA + partialB; }",
104
+ fnReset: "function() { return 0; }"
105
+ }
106
+ else
107
+ { type: "double#{method.capitalize}", name: naming, fieldName: column }
108
+ end
109
+ }
110
+ @params[:aggregations].uniq!
111
+ self
112
+ end
113
+
114
+ [:min, :max, :sum].each do |method|
115
+ define_method(method) do |*columns|
116
+ meta_method(method, columns)
117
+ end
118
+ end
119
+
120
+ def count(*columns)
121
+ @params[:queryType] ||= TimeSeries
122
+ @params[:aggregations] ||= []
123
+ if columns.empty?
124
+ @params[:aggregations] << { type: "count", name: "count" }
125
+ else
126
+ @params[:aggregations] += columns.map{|column| { type: "cardinality", name: "count(#{column})", fields: [column] } }
127
+ end
128
+ self
129
+ end
130
+
131
+ def where(*conditions)
132
+ if conditions[0].is_a?(Hash)
133
+ conditions = conditions[0]
134
+ begin_date = conditions.delete(:begin_date)
135
+ end_date = conditions.delete(:end_date)
136
+ @params[:intervals] = ["#{begin_date}/#{end_date}"]
137
+
138
+ conditions = conditions.delete_if{|key, value| value.blank?}.map{|column, values|
139
+ if !values.is_a?(Array)
140
+ { type: "selector", dimension: column, value: values }
141
+ elsif values.count == 1
142
+ { type: "selector", dimension: column, value: values[0] }
143
+ else
144
+ { type: "in", dimension: column, values: values }
145
+ end
146
+ }.compact
147
+ elsif conditions[0].is_a?(String)
148
+ conditions[0].gsub!(" \?").each_with_index { |v, i| " #{conditions[i + 1]}" } if conditions[0][" \?"]
149
+ conditions = [where_chain( conditions[0] )]
150
+ else
151
+ conditions = nil
152
+ end
153
+
154
+ unless conditions.blank?
155
+ @params[:filter] ||= { type: "and", fields: [] }
156
+ @params[:filter][:fields] += conditions
157
+ end
158
+ self
159
+ end
160
+
161
+ def order(*columns)
162
+ columns = columns[0] if columns[0].is_a?(Hash) || columns[0].is_a?(Array)
163
+
164
+ if @params[:queryType] != GroupBy
165
+ @params[:metric] ||= []
166
+ @params[:metric] += columns.map{|column, direction| column }
167
+ @params[:descending] = columns.any?{|column, direction| direction.to_s[/desc/]}
168
+ end
169
+ @params[:limitSpec] = {
170
+ type: "default",
171
+ limit: 500000,
172
+ columns: columns.map{|column, direction|
173
+ {
174
+ dimension: column.to_s,
175
+ direction: direction.to_s[/desc/] ? "descending" : "ascending",
176
+ dimensionOrder: "lexicographic"
177
+ }
178
+ }
179
+ }
180
+ self
181
+ end
182
+
183
+ def limit(limit_count)
184
+ @params[:limitSpec] ||= {}
185
+ @params[:limitSpec][:type] ||= "default"
186
+ @params[:limitSpec][:limit] = limit_count
187
+ self
188
+ end
189
+
190
+ def top(top_count)
191
+ @params[:queryType] = TopN
192
+ @params[:threshold] = top_count
193
+ @params[:metric] = @params.delete(:limitSpec)[:columns][0] if @params[:limitSpec]
194
+ self
195
+ end
196
+
197
+ def page(page_count)
198
+ if page_count == 1
199
+ @params[:pagingSpec] = {pagingIdentifiers: {}, threshold: @threshold}
200
+ elsif page_count > 1
201
+ current = @params.hash
202
+ @paging_identifiers[current] ||= {0 => {}}
203
+
204
+ (1..page_count-1).each do |current_page|
205
+ if begin @paging_identifiers[current][current_page].nil? rescue true end
206
+ query(@params.merge(pagingSpec: {pagingIdentifiers: @paging_identifiers[current][current_page-1], threshold: @threshold}), current_page)
207
+ end
208
+ end if begin @paging_identifiers[current][page_count - 1].nil? rescue true end
209
+
210
+ @params[:pagingSpec] = {pagingIdentifiers: @paging_identifiers[current][page_count - 1], threshold: @threshold}
211
+ end
212
+ self
213
+ end
214
+
215
+ def having(*conditions)
216
+ # Process the ('a = ? and b = ?', 1, 2)
217
+ conditions[0].gsub!(" \?").each_with_index { |v, i| " #{conditions[i + 1]}" }
218
+
219
+ havings = having_chain(conditions[0])
220
+ @params[:having] = havings unless havings.blank?
221
+
222
+ self
223
+ end
224
+
225
+ def query(params = @params, page_count = nil)
226
+ params = params.slice(*Permit_Properties[params[:queryType]])
227
+ ap params if ENV['DEBUG']
228
+ puts params.to_json if ENV['DEBUG']
229
+ result = HTTParty.post(@url, body: params.to_json, headers: { 'Content-Type' => 'application/json' })
230
+ puts result.code if ENV['DEBUG']
231
+
232
+ # The result is a String, try to find the existence of substring 'pagingIdentifiers'.
233
+ if page_count && result["pagingIdentifiers"]
234
+ params.delete(:pagingSpec)
235
+ current = params.hash
236
+
237
+ # The pagingIdentifiers is something like { "publisher_daily_report_2017-03-01T00:00:00.000Z_2017-03-11T00:00:00.000Z_2017-04-17T21:04:30.804Z" => -10 }
238
+ @paging_identifiers[current] ||= {}
239
+ @paging_identifiers[current][page_count] = JSON.parse(result.body)[0]["result"]["pagingIdentifiers"].transform_values{|value| value + 1}
240
+ end
241
+ # ap JSON.parse(result) if ENV['DEBUG']
242
+
243
+ result.body
244
+ end
245
+
246
+ def time_boundary
247
+ @params[:queryType] = TimeBoundary
248
+ self
249
+ end
250
+
251
+ def max_time
252
+ @params[:queryType] = TimeBoundary
253
+ @params[:bound] = "maxTime"
254
+ self
255
+ end
256
+
257
+ def min_time
258
+ @params[:queryType] = TimeBoundary
259
+ @params[:bound] = "minTime"
260
+ self
261
+ end
262
+
263
+ def source_meta
264
+ @params[:queryType] = DataSourceMetaData
265
+ self
266
+ end
267
+
268
+ def segment_meta
269
+ @params[:queryType] = SegmentMetaData
270
+ self
271
+ end
272
+
273
+ def to_s
274
+ query
275
+ end
276
+
277
+ def to_a
278
+ @params[:queryType] == SegmentMetaData ? JSON.parse(query)[0]["columns"] : JSON.parse(query)[0]["result"]["events"]
279
+ end
280
+
281
+ def each(&block)
282
+ to_a.each(&block)
283
+ end
284
+
285
+ def map(&block)
286
+ to_a.map(&block)
287
+ end
288
+
289
+ def get
290
+ result = HTTParty.get(@url)
291
+ puts result.code if ENV["DEBUG"]
292
+ result.body
293
+ end
294
+
295
+ def delete
296
+ result = HTTParty.delete(@url)
297
+ puts result.code if ENV["DEBUG"]
298
+ result.body
299
+ end
300
+
301
+ private
302
+
303
+ def where_chain(conditions)
304
+ conditions = conditions[1..-2] while conditions[0] == "\(" && conditions[-1] == "\)"
305
+
306
+ if conditions[/ (or|and) /]
307
+ %w(or and).each do |relation|
308
+ mark = " #{relation} "
309
+ if conditions[mark]
310
+ parts = conditions.split(mark)
311
+ return { type: relation, fields: parts.map{|part| where_chain(part)} } if check_brackets(parts)
312
+
313
+ (parts.length - 2).downto(0) do |i|
314
+ left = parts[0 .. i].join(mark)
315
+ right = parts[i+1..-1].join(mark)
316
+ return { type: relation, fields: [where_chain(left), where_chain(right)] } if check_brackets(left) && check_brackets(right)
317
+ end
318
+ end
319
+ end
320
+ else
321
+ column, op, value = conditions.split(/ (\<|\>|\<\=|\>\=|\=|\~|regex|in) /).map(&:strip)
322
+ case op
323
+ when "=" then { type: "selector", dimension: column, value: value }
324
+ when ">" then { type: "bound", dimension: column, lower: value, ordering: "numeric" }
325
+ when ">=" then { type: "bound", dimension: column, lower: value, ordering: "numeric", lowerStrict: false }
326
+ when "<" then { type: "bound", dimension: column, upper: value, ordering: "numeric" }
327
+ when "<=" then { type: "bound", dimension: column, upper: value, ordering: "numeric", upperStrict: false }
328
+ when "~" then value = JSON.parse(value); { type: "bound", dimension: column, lower: value[0], upper: value[1], ordering: "numeric"}
329
+ when "regex" then value.gsub!(/[\"\']/,""); { type: "regex", dimension: column, pattern: value }
330
+ when "in" then { type: "in", dimension: column, values: JSON.parse(values) }
331
+ else nil
332
+ end
333
+ end
334
+ end
335
+
336
+ def having_chain(conditions)
337
+ conditions = conditions[1..-2] while conditions[0] == "\(" && conditions[-1] == "\)"
338
+
339
+ if conditions[/ (or|and) /]
340
+ %w(or and).each do |relation|
341
+ mark = " #{relation} "
342
+ if conditions[mark]
343
+ parts = conditions.split(mark)
344
+ return { type: relation, havingSpecs: parts.map{|part| having_chain(part)} } if check_brackets(parts)
345
+
346
+ (parts.length - 2).downto(0) do |i|
347
+ left = parts[0 .. i].join(mark)
348
+ right = parts[i+1..-1].join(mark)
349
+ return { type: relation, havingSpecs: [having_chain(left), having_chain(right)] } if check_brackets(left) && check_brackets(right)
350
+ end
351
+ end
352
+ end
353
+ elsif conditions[/[\<\>\=]/]
354
+ column, op, value = conditions.split(/( [\<\>\=] )/).map(&:strip)
355
+ { type: OPERATIONS[op], aggregation: column, value: value.to_f }
356
+ else
357
+ nil
358
+ end
359
+ end
360
+
361
+ def post_chain(sentences)
362
+ sentences, naming = sentences.split(" as ")
363
+ sentences = sentences[1..-2] while sentences[0] == "\(" && sentences[-2..-1] == "\)\)"
364
+
365
+ if sentences[/( (\+\+|\-\-|\*\*|\/\/) )/]
366
+ %w(+ - * /).each do |op|
367
+ mark = " #{op*2} "
368
+ if sentences[mark]
369
+ parts = sentences.split(mark)
370
+
371
+ (parts.length - 2).downto(0) do |i|
372
+ left = parts[0 .. i].join(mark)
373
+ right = parts[i+1..-1].join(mark)
374
+ return { type: "arithmetic", name: naming, fn: op, fields: [post_chain(left), post_chain(right)] } if check_brackets(left) && check_brackets(right)
375
+ end
376
+ end
377
+ end
378
+ else
379
+ method_column(sentences)
380
+
381
+ { type: "fieldAccess", name: naming, fieldName: sentences }
382
+ end
383
+ end
384
+
385
+ def select_lookup(columns)
386
+ if columns.present?
387
+ @params[:dimensions] ||= []
388
+ @params[:dimensions] += columns.map{|columns|
389
+ {
390
+ type: "lookup",
391
+ dimension: columns[:dimension] || columns["dimension"],
392
+ outputName: columns[:output] || columns["output"],
393
+ name: columns[:name] || columns["name"],
394
+ retainMissingValue: true,
395
+ }
396
+ }
397
+ end
398
+ end
399
+
400
+ def method_column(column)
401
+ method = column[/(sum|max|min|count)/i].downcase
402
+ column = column.split(" as ")[0].gsub(/#{method}/i,"").gsub(/[\(\)]/,"")
403
+
404
+ # Add the column to aggregations, which name is like sum(column), min(column), max(column), count(column)
405
+ send(method, column)
406
+ end
407
+
408
+ def check_brackets(*sentences)
409
+ sentences.flatten!
410
+ sentences.all?{|sentence| sentence.scan("\(").count == sentence.scan("\)").count }
411
+ end
412
+
413
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: claw_druid
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Fan Jieqi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-05-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '12.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '12.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: httparty
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.14.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.14.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: json
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.7'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: awesome_print
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.7'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: activesupport
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 4.2.3
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 4.2.3
83
+ description: The ruby client of Druid.
84
+ email: fanjieqi@nibirutech.com
85
+ executables: []
86
+ extensions: []
87
+ extra_rdoc_files: []
88
+ files:
89
+ - lib/array.rb
90
+ - lib/claw_druid.rb
91
+ homepage: http://galileo.tap4fun.com/fanjieqi/claw_druid
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.6.11
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: The ruby client of Druid.
115
+ test_files: []