claw_druid 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/lib/array.rb +7 -0
  3. data/lib/claw_druid.rb +413 -0
  4. metadata +115 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f777ce0e3f8ad81341da2ff02e8fbd09ef4b8db8
4
+ data.tar.gz: 339d80e1f87b67e3845f4b1464f24bc65a36b812
5
+ SHA512:
6
+ metadata.gz: e7a460e6cb7ae7f578369e98b592bd5a691d0fc216eac65c9816ecf578b5b21603f89c70a3e5b3e707da1d31d5ab8fbe36b86baf92203f51aa15aa6ad9f1b719
7
+ data.tar.gz: 08730e5bb37f79f9f3452659e156802b2868e6008603e78f8a80142bf5d7e136b65bc604e77ef3190ed8efa9c5c51d034d6265cef1651431ea2393c0edb257d1
data/lib/array.rb ADDED
@@ -0,0 +1,7 @@
1
+ class Array
2
+ def except
3
+ result = self.select{|value| yield(value) }
4
+ self.delete_if{|value| yield(value) }
5
+ result
6
+ end
7
+ end
data/lib/claw_druid.rb ADDED
@@ -0,0 +1,413 @@
1
+ require 'httparty'
2
+ require 'json'
3
+ require 'awesome_print'
4
+ require 'active_support/all'
5
+ require_relative './array'
6
+
7
+ class ClawDruid
8
+ include Enumerable
9
+
10
+ THRESHOLD = ENV["DEBUG"] ? 5 : 30
11
+ OPERATIONS = {
12
+ '<' => "lessThan",
13
+ '>' => 'greaterThan',
14
+ '=' => 'equalTo'
15
+ }
16
+
17
+ FnAggregates = {
18
+ "min" => "return Math.min(current, (COLUMN));",
19
+ "max" => "return Math.max(current, (COLUMN));",
20
+ "sum" => "return current + (COLUMN);"
21
+ }
22
+
23
+ TopN = "topN"
24
+ GroupBy = "groupBy"
25
+ TimeSeries = "timeseries"
26
+ TimeBoundary = "timeBoundary"
27
+ SegmentMetaData = "segmentMetadata"
28
+ DataSourceMetaData = "dataSourceMetadata"
29
+
30
+ Permit_Properties = {
31
+ TopN => [:queryType, :dataSource, :intervals, :granularity, :filter, :aggregations, :postAggregations, :dimension, :threshold, :metric, :context],
32
+ GroupBy => [:queryType, :dataSource, :dimensions, :limitSpec, :having, :granularity, :filter, :aggregations, :postAggregations, :intervals, :context],
33
+ TimeSeries => [:queryType, :dataSource, :descending, :intervals, :granularity, :filter, :aggregations, :postAggregations, :context],
34
+ TimeBoundary => [:queryType, :dataSource, :bound, :filter, :context],
35
+ SegmentMetaData => [:queryType, :dataSource, :intervals, :toInclude, :merge, :context, :analysisTypes, :lenientAggregatorMerge],
36
+ DataSourceMetaData => [:queryType, :dataSource, :context],
37
+ }
38
+
39
+ def initialize(params = {})
40
+ @url = params[:url]
41
+ @params = {dataSource: params[:source], granularity: "all", queryType: "select"}
42
+ @threshold = params[:threshold] || THRESHOLD
43
+
44
+ # The page_identifiers of every query, the key is the params.hash of the query, the value is a identifiers like "publisher_daily_report_2017-02-02T00:00:00.000Z_2017-02-04T00:00:00.000Z_2017-03-30T12:10:27.053Z"
45
+ @paging_identifiers = {}
46
+ end
47
+
48
+ def group(*dimensions)
49
+ dimensions = dimensions[0] if dimensions.count == 1 && dimensions[0].is_a?(Array)
50
+
51
+ @params[:queryType] = GroupBy
52
+
53
+ lookup_dimensions = dimensions.except{|dimension| dimension.is_a? Hash }
54
+ select_lookup(lookup_dimensions)
55
+
56
+ if dimensions && dimensions.count > 0
57
+ @params[:dimensions] ||= []
58
+ @params[:dimensions] += dimensions.map(&:to_s).map(&:strip)
59
+ end
60
+ @params.delete(:metrics)
61
+ self
62
+ end
63
+
64
+ def select(*columns)
65
+ # Split the columns like ['sum(column_a) as sum_a, column_b']
66
+ columns = columns[0].split("\, ") if columns.count == 1 && columns[0].is_a?(String) && columns[0]["\, "]
67
+ columns = columns[0] if columns.count == 1 && columns[0].is_a?(Array)
68
+
69
+ return self if columns.all?{|column| column.blank? }
70
+
71
+ # Add the 'i' to regex to be case-insensitive, cause the sum, max and min could be SUM, MAX and MIN
72
+ post_columns = columns.except{|column| column[/(sum|max|min|count).+[\+\-\*\/]/i] }
73
+ @params[:postAggregations] = post_columns.map{|post_column| post_chain(post_column) } unless post_columns.blank?
74
+
75
+ method_columns = columns.except{|column| column.is_a?(String) && column[/(sum|max|min|count)\(.+\)/i] }
76
+ method_columns.each{|column| method_column(column) }
77
+
78
+ lookup_columns = columns.except{|column| column.is_a? Hash }
79
+ select_lookup(lookup_columns)
80
+
81
+ if columns && columns.count > 0
82
+ @params[:metrics] ||= []
83
+ @params[:metrics] += columns.map(&:to_s).map(&:strip)
84
+ end
85
+ self
86
+ end
87
+
88
+ def meta_method(method, columns)
89
+ columns = columns[0] if columns.count == 1 and columns[0].is_a?(Array)
90
+
91
+ @params[:queryType] ||= TimeSeries
92
+ @params[:aggregations] ||= []
93
+ @params[:aggregations] += columns.map{|column, naming|
94
+ naming ||= "#{method}(#{column})"
95
+ fnAggregate = FnAggregates[method.to_s].gsub("COLUMN", column.to_s)
96
+ if column[/( [\+\-\*\/] )/]
97
+ fields = column.split(/ [\+\-\*\/] /)
98
+ {
99
+ type: "javascript",
100
+ name: naming,
101
+ fieldNames: fields,
102
+ fnAggregate: "function(current, #{fields.join(', ')}) { #{fnAggregate} }",
103
+ fnCombine: "function(partialA, partialB) { return partialA + partialB; }",
104
+ fnReset: "function() { return 0; }"
105
+ }
106
+ else
107
+ { type: "double#{method.capitalize}", name: naming, fieldName: column }
108
+ end
109
+ }
110
+ @params[:aggregations].uniq!
111
+ self
112
+ end
113
+
114
+ [:min, :max, :sum].each do |method|
115
+ define_method(method) do |*columns|
116
+ meta_method(method, columns)
117
+ end
118
+ end
119
+
120
+ def count(*columns)
121
+ @params[:queryType] ||= TimeSeries
122
+ @params[:aggregations] ||= []
123
+ if columns.empty?
124
+ @params[:aggregations] << { type: "count", name: "count" }
125
+ else
126
+ @params[:aggregations] += columns.map{|column| { type: "cardinality", name: "count(#{column})", fields: [column] } }
127
+ end
128
+ self
129
+ end
130
+
131
+ def where(*conditions)
132
+ if conditions[0].is_a?(Hash)
133
+ conditions = conditions[0]
134
+ begin_date = conditions.delete(:begin_date)
135
+ end_date = conditions.delete(:end_date)
136
+ @params[:intervals] = ["#{begin_date}/#{end_date}"]
137
+
138
+ conditions = conditions.delete_if{|key, value| value.blank?}.map{|column, values|
139
+ if !values.is_a?(Array)
140
+ { type: "selector", dimension: column, value: values }
141
+ elsif values.count == 1
142
+ { type: "selector", dimension: column, value: values[0] }
143
+ else
144
+ { type: "in", dimension: column, values: values }
145
+ end
146
+ }.compact
147
+ elsif conditions[0].is_a?(String)
148
+ conditions[0].gsub!(" \?").each_with_index { |v, i| " #{conditions[i + 1]}" } if conditions[0][" \?"]
149
+ conditions = [where_chain( conditions[0] )]
150
+ else
151
+ conditions = nil
152
+ end
153
+
154
+ unless conditions.blank?
155
+ @params[:filter] ||= { type: "and", fields: [] }
156
+ @params[:filter][:fields] += conditions
157
+ end
158
+ self
159
+ end
160
+
161
+ def order(*columns)
162
+ columns = columns[0] if columns[0].is_a?(Hash) || columns[0].is_a?(Array)
163
+
164
+ if @params[:queryType] != GroupBy
165
+ @params[:metric] ||= []
166
+ @params[:metric] += columns.map{|column, direction| column }
167
+ @params[:descending] = columns.any?{|column, direction| direction.to_s[/desc/]}
168
+ end
169
+ @params[:limitSpec] = {
170
+ type: "default",
171
+ limit: 500000,
172
+ columns: columns.map{|column, direction|
173
+ {
174
+ dimension: column.to_s,
175
+ direction: direction.to_s[/desc/] ? "descending" : "ascending",
176
+ dimensionOrder: "lexicographic"
177
+ }
178
+ }
179
+ }
180
+ self
181
+ end
182
+
183
+ def limit(limit_count)
184
+ @params[:limitSpec] ||= {}
185
+ @params[:limitSpec][:type] ||= "default"
186
+ @params[:limitSpec][:limit] = limit_count
187
+ self
188
+ end
189
+
190
+ def top(top_count)
191
+ @params[:queryType] = TopN
192
+ @params[:threshold] = top_count
193
+ @params[:metric] = @params.delete(:limitSpec)[:columns][0] if @params[:limitSpec]
194
+ self
195
+ end
196
+
197
+ def page(page_count)
198
+ if page_count == 1
199
+ @params[:pagingSpec] = {pagingIdentifiers: {}, threshold: @threshold}
200
+ elsif page_count > 1
201
+ current = @params.hash
202
+ @paging_identifiers[current] ||= {0 => {}}
203
+
204
+ (1..page_count-1).each do |current_page|
205
+ if begin @paging_identifiers[current][current_page].nil? rescue true end
206
+ query(@params.merge(pagingSpec: {pagingIdentifiers: @paging_identifiers[current][current_page-1], threshold: @threshold}), current_page)
207
+ end
208
+ end if begin @paging_identifiers[current][page_count - 1].nil? rescue true end
209
+
210
+ @params[:pagingSpec] = {pagingIdentifiers: @paging_identifiers[current][page_count - 1], threshold: @threshold}
211
+ end
212
+ self
213
+ end
214
+
215
+ def having(*conditions)
216
+ # Process the ('a = ? and b = ?', 1, 2)
217
+ conditions[0].gsub!(" \?").each_with_index { |v, i| " #{conditions[i + 1]}" }
218
+
219
+ havings = having_chain(conditions[0])
220
+ @params[:having] = havings unless havings.blank?
221
+
222
+ self
223
+ end
224
+
225
+ def query(params = @params, page_count = nil)
226
+ params = params.slice(*Permit_Properties[params[:queryType]])
227
+ ap params if ENV['DEBUG']
228
+ puts params.to_json if ENV['DEBUG']
229
+ result = HTTParty.post(@url, body: params.to_json, headers: { 'Content-Type' => 'application/json' })
230
+ puts result.code if ENV['DEBUG']
231
+
232
+ # The result is a String, try to find the existence of substring 'pagingIdentifiers'.
233
+ if page_count && result["pagingIdentifiers"]
234
+ params.delete(:pagingSpec)
235
+ current = params.hash
236
+
237
+ # The pagingIdentifiers is something like { "publisher_daily_report_2017-03-01T00:00:00.000Z_2017-03-11T00:00:00.000Z_2017-04-17T21:04:30.804Z" => -10 }
238
+ @paging_identifiers[current] ||= {}
239
+ @paging_identifiers[current][page_count] = JSON.parse(result.body)[0]["result"]["pagingIdentifiers"].transform_values{|value| value + 1}
240
+ end
241
+ # ap JSON.parse(result) if ENV['DEBUG']
242
+
243
+ result.body
244
+ end
245
+
246
+ def time_boundary
247
+ @params[:queryType] = TimeBoundary
248
+ self
249
+ end
250
+
251
+ def max_time
252
+ @params[:queryType] = TimeBoundary
253
+ @params[:bound] = "maxTime"
254
+ self
255
+ end
256
+
257
+ def min_time
258
+ @params[:queryType] = TimeBoundary
259
+ @params[:bound] = "minTime"
260
+ self
261
+ end
262
+
263
+ def source_meta
264
+ @params[:queryType] = DataSourceMetaData
265
+ self
266
+ end
267
+
268
+ def segment_meta
269
+ @params[:queryType] = SegmentMetaData
270
+ self
271
+ end
272
+
273
+ def to_s
274
+ query
275
+ end
276
+
277
+ def to_a
278
+ @params[:queryType] == SegmentMetaData ? JSON.parse(query)[0]["columns"] : JSON.parse(query)[0]["result"]["events"]
279
+ end
280
+
281
+ def each(&block)
282
+ to_a.each(&block)
283
+ end
284
+
285
+ def map(&block)
286
+ to_a.map(&block)
287
+ end
288
+
289
+ def get
290
+ result = HTTParty.get(@url)
291
+ puts result.code if ENV["DEBUG"]
292
+ result.body
293
+ end
294
+
295
+ def delete
296
+ result = HTTParty.delete(@url)
297
+ puts result.code if ENV["DEBUG"]
298
+ result.body
299
+ end
300
+
301
+ private
302
+
303
+ def where_chain(conditions)
304
+ conditions = conditions[1..-2] while conditions[0] == "\(" && conditions[-1] == "\)"
305
+
306
+ if conditions[/ (or|and) /]
307
+ %w(or and).each do |relation|
308
+ mark = " #{relation} "
309
+ if conditions[mark]
310
+ parts = conditions.split(mark)
311
+ return { type: relation, fields: parts.map{|part| where_chain(part)} } if check_brackets(parts)
312
+
313
+ (parts.length - 2).downto(0) do |i|
314
+ left = parts[0 .. i].join(mark)
315
+ right = parts[i+1..-1].join(mark)
316
+ return { type: relation, fields: [where_chain(left), where_chain(right)] } if check_brackets(left) && check_brackets(right)
317
+ end
318
+ end
319
+ end
320
+ else
321
+ column, op, value = conditions.split(/ (\<|\>|\<\=|\>\=|\=|\~|regex|in) /).map(&:strip)
322
+ case op
323
+ when "=" then { type: "selector", dimension: column, value: value }
324
+ when ">" then { type: "bound", dimension: column, lower: value, ordering: "numeric" }
325
+ when ">=" then { type: "bound", dimension: column, lower: value, ordering: "numeric", lowerStrict: false }
326
+ when "<" then { type: "bound", dimension: column, upper: value, ordering: "numeric" }
327
+ when "<=" then { type: "bound", dimension: column, upper: value, ordering: "numeric", upperStrict: false }
328
+ when "~" then value = JSON.parse(value); { type: "bound", dimension: column, lower: value[0], upper: value[1], ordering: "numeric"}
329
+ when "regex" then value.gsub!(/[\"\']/,""); { type: "regex", dimension: column, pattern: value }
330
+ when "in" then { type: "in", dimension: column, values: JSON.parse(values) }
331
+ else nil
332
+ end
333
+ end
334
+ end
335
+
336
+ def having_chain(conditions)
337
+ conditions = conditions[1..-2] while conditions[0] == "\(" && conditions[-1] == "\)"
338
+
339
+ if conditions[/ (or|and) /]
340
+ %w(or and).each do |relation|
341
+ mark = " #{relation} "
342
+ if conditions[mark]
343
+ parts = conditions.split(mark)
344
+ return { type: relation, havingSpecs: parts.map{|part| having_chain(part)} } if check_brackets(parts)
345
+
346
+ (parts.length - 2).downto(0) do |i|
347
+ left = parts[0 .. i].join(mark)
348
+ right = parts[i+1..-1].join(mark)
349
+ return { type: relation, havingSpecs: [having_chain(left), having_chain(right)] } if check_brackets(left) && check_brackets(right)
350
+ end
351
+ end
352
+ end
353
+ elsif conditions[/[\<\>\=]/]
354
+ column, op, value = conditions.split(/( [\<\>\=] )/).map(&:strip)
355
+ { type: OPERATIONS[op], aggregation: column, value: value.to_f }
356
+ else
357
+ nil
358
+ end
359
+ end
360
+
361
+ def post_chain(sentences)
362
+ sentences, naming = sentences.split(" as ")
363
+ sentences = sentences[1..-2] while sentences[0] == "\(" && sentences[-2..-1] == "\)\)"
364
+
365
+ if sentences[/( (\+\+|\-\-|\*\*|\/\/) )/]
366
+ %w(+ - * /).each do |op|
367
+ mark = " #{op*2} "
368
+ if sentences[mark]
369
+ parts = sentences.split(mark)
370
+
371
+ (parts.length - 2).downto(0) do |i|
372
+ left = parts[0 .. i].join(mark)
373
+ right = parts[i+1..-1].join(mark)
374
+ return { type: "arithmetic", name: naming, fn: op, fields: [post_chain(left), post_chain(right)] } if check_brackets(left) && check_brackets(right)
375
+ end
376
+ end
377
+ end
378
+ else
379
+ method_column(sentences)
380
+
381
+ { type: "fieldAccess", name: naming, fieldName: sentences }
382
+ end
383
+ end
384
+
385
+ def select_lookup(columns)
386
+ if columns.present?
387
+ @params[:dimensions] ||= []
388
+ @params[:dimensions] += columns.map{|columns|
389
+ {
390
+ type: "lookup",
391
+ dimension: columns[:dimension] || columns["dimension"],
392
+ outputName: columns[:output] || columns["output"],
393
+ name: columns[:name] || columns["name"],
394
+ retainMissingValue: true,
395
+ }
396
+ }
397
+ end
398
+ end
399
+
400
+ def method_column(column)
401
+ method = column[/(sum|max|min|count)/i].downcase
402
+ column = column.split(" as ")[0].gsub(/#{method}/i,"").gsub(/[\(\)]/,"")
403
+
404
+ # Add the column to aggregations, which name is like sum(column), min(column), max(column), count(column)
405
+ send(method, column)
406
+ end
407
+
408
+ def check_brackets(*sentences)
409
+ sentences.flatten!
410
+ sentences.all?{|sentence| sentence.scan("\(").count == sentence.scan("\)").count }
411
+ end
412
+
413
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: claw_druid
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Fan Jieqi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-05-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '12.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '12.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: httparty
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.14.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.14.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: json
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.7'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: awesome_print
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.7'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: activesupport
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 4.2.3
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 4.2.3
83
+ description: The ruby client of Druid.
84
+ email: fanjieqi@nibirutech.com
85
+ executables: []
86
+ extensions: []
87
+ extra_rdoc_files: []
88
+ files:
89
+ - lib/array.rb
90
+ - lib/claw_druid.rb
91
+ homepage: http://galileo.tap4fun.com/fanjieqi/claw_druid
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.6.11
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: The ruby client of Druid.
115
+ test_files: []