dataoperations-aggregate 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dataoperations-aggregate.rb +87 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88229d7d1a3eb43c59f593c15ed5350189f534927d849b8bb75e70583610bdb6
|
4
|
+
data.tar.gz: 812bea576d59f4211e659c6e6da8f37cdabbac7a01faadc9e8f09e3512f0ea7b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dabc7016ecefa788042959f1184b258fe70d2468ed5e72e9a675902c1f2f53941559fe5e5083887f131662daf65e4f6339bf788269509f41da3093dd090b68e2
|
7
|
+
data.tar.gz: d0da00e819b18b73d0a942e2b8b871af276300936043cb09ebec48e0277bd1eb283ffbc57fdee3e73c1d400aae0149d7cfc9634dc45bf7533a9f68daa18f730f
|
@@ -9,9 +9,10 @@ module DataOperations
|
|
9
9
|
DEFAULT_INTERVALS = [10].freeze
|
10
10
|
DEFAULT_FLUSH_INTERVAL = 5
|
11
11
|
DEFAULT_PROCESSING_MODE = :batch
|
12
|
+
DEFAULT_TIME_STARTED_MODE = :first_message
|
12
13
|
DEFAULT_FIELD_NO_DATA_VALUE = 'no_data'.freeze
|
13
14
|
DEFAULT_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
|
14
|
-
VALID_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
|
15
|
+
VALID_AGGREGATIONS = %w[sum min max mean median variance standard_deviation bucket].freeze
|
15
16
|
DEFAULT_HASH_TIME_FORMAT = '%Y-%m-%dT%H'.freeze
|
16
17
|
DEFAULT_INERVAL_SECONDS = 3600
|
17
18
|
|
@@ -24,11 +25,14 @@ module DataOperations
|
|
24
25
|
keep_interval: DEFAULT_KEEP_INTERVAL,
|
25
26
|
field_no_data_value: DEFAULT_FIELD_NO_DATA_VALUE,
|
26
27
|
processing_mode: DEFAULT_PROCESSING_MODE,
|
28
|
+
time_started_mode: DEFAULT_TIME_STARTED_MODE,
|
27
29
|
aggregator_name: nil,
|
28
30
|
log: Logger.new(STDOUT),
|
29
31
|
aggregation_names:,
|
30
32
|
group_field_names:,
|
31
|
-
aggregate_field_names
|
33
|
+
aggregate_field_names:,
|
34
|
+
buckets:[],
|
35
|
+
bucket_metrics:[]
|
32
36
|
)
|
33
37
|
@aggregator = aggregator
|
34
38
|
@time_format = time_format
|
@@ -39,7 +43,10 @@ module DataOperations
|
|
39
43
|
@keep_interval = keep_interval
|
40
44
|
@field_no_data_value = field_no_data_value
|
41
45
|
@processing_mode = processing_mode
|
46
|
+
@time_started_mode = time_started_mode
|
42
47
|
@aggregator_name = aggregator_name
|
48
|
+
@buckets = buckets
|
49
|
+
@bucket_metrics = bucket_metrics
|
43
50
|
|
44
51
|
|
45
52
|
if aggregation_names.nil? || !aggregation_names.is_a?(Array)
|
@@ -117,14 +124,14 @@ module DataOperations
|
|
117
124
|
aggregator_item['aggregate_fields'] = aggregate_detail
|
118
125
|
aggregator_item['intervals'] = interval_detail
|
119
126
|
|
120
|
-
|
127
|
+
@aggregator_mutex.synchronize {@aggregator[hash_group_key] = aggregator_item}
|
121
128
|
end
|
122
129
|
|
123
130
|
if !aggregator_item['aggregate_fields'].key?(aggregator_hash_key)
|
124
131
|
hash_aggregator = {}
|
125
132
|
hash_aggregator[:time_started] = Time.now.to_i
|
126
133
|
hash_aggregator['processed'] = 1
|
127
|
-
|
134
|
+
@aggregator_mutex.synchronize {aggregator_item['aggregate_fields'][aggregator_hash_key] = hash_aggregator}
|
128
135
|
else
|
129
136
|
aggregator_item['aggregate_fields'][aggregator_hash_key]['processed'] += 1
|
130
137
|
end
|
@@ -216,16 +223,51 @@ module DataOperations
|
|
216
223
|
# Aggregate data
|
217
224
|
if aggregate_field_value.is_a?(Array)
|
218
225
|
@aggregation_names.each do |operation|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
226
|
+
|
227
|
+
#If bucket, calculate bucket for metric
|
228
|
+
if operation == 'bucket'
|
229
|
+
#If set buckets and set metrics to calculate (bucket values depends of ranges based in metric activity)
|
230
|
+
if !@buckets.nil? && !@bucket_metrics.nil? && @bucket_metrics.include?(aggregate_field_key)
|
231
|
+
data_bucket = calculate_buckets(aggregate_field_value, @buckets)
|
232
|
+
|
233
|
+
data_bucket.each {|bucket,bucket_count|
|
234
|
+
#@log.info("#{aggregate_field_key}_#{bucket} = #{bucket_count}")
|
235
|
+
aggregator_data["#{aggregate_field_key}_bucket#{bucket}"] = bucket_count
|
236
|
+
}
|
237
|
+
|
238
|
+
# Add aggregated data to interval
|
239
|
+
group_item_value['intervals'].keys[1..-1].each do |interval_secs|
|
240
|
+
|
241
|
+
interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
|
242
|
+
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
243
|
+
data_bucket.each {|bucket,bucket_count|
|
244
|
+
#@log.info("#{aggregate_field_key}_#{bucket} = #{bucket_count}")
|
245
|
+
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key]["bucket#{bucket}"] = [] if interval_aggregator_item_value['aggregate_fields'][aggregate_field_key]["bucket#{bucket}"].nil?
|
246
|
+
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key]["bucket#{bucket}"] << bucket_count
|
247
|
+
}
|
248
|
+
|
249
|
+
end
|
250
|
+
|
251
|
+
end
|
252
|
+
else
|
253
|
+
data = aggregate_field_value.method(operation).call
|
254
|
+
aggregator_data["#{aggregate_field_key}_#{operation}"] = data
|
255
|
+
|
256
|
+
# Add aggregated data to interval
|
257
|
+
group_item_value['intervals'].keys[1..-1].each do |interval_secs|
|
258
|
+
interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
|
259
|
+
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
260
|
+
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] << data
|
261
|
+
end
|
227
262
|
end
|
263
|
+
|
264
|
+
|
265
|
+
end
|
266
|
+
|
267
|
+
if !@buckets.nil? && ! @bucket_metrics.nil?
|
268
|
+
#data = calculate_buckets(data, @buckets)
|
228
269
|
end
|
270
|
+
|
229
271
|
end
|
230
272
|
end
|
231
273
|
|
@@ -241,6 +283,9 @@ module DataOperations
|
|
241
283
|
@aggregation_names.each do |operation|
|
242
284
|
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] = []
|
243
285
|
end
|
286
|
+
|
287
|
+
##Add buckets metadata (empty hash)
|
288
|
+
##interval_aggregator_item_value['aggregate_fields'][aggregate_field_key]['buckets']={}
|
244
289
|
end
|
245
290
|
end
|
246
291
|
|
@@ -249,7 +294,9 @@ module DataOperations
|
|
249
294
|
# @log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
|
250
295
|
|
251
296
|
if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
252
|
-
if
|
297
|
+
if @time_started_mode == :first_event && aggregator_item_value[:time_started] < interval_aggregator_item_value[:time_started]
|
298
|
+
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
|
299
|
+
elseif @time_started_mode == :last_event && aggregator_item_value[:time_started] > interval_aggregator_item_value[:time_started]
|
253
300
|
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
|
254
301
|
end
|
255
302
|
interval_aggregator_item_value['processed'] += aggregator_item_value['processed']
|
@@ -292,10 +339,17 @@ module DataOperations
|
|
292
339
|
case operation
|
293
340
|
when 'max', 'min', 'mean', 'median'
|
294
341
|
data = vector.method(operation).call
|
342
|
+
when 'bucket'
|
343
|
+
#Bucket operation generate bucket[\d]+
|
344
|
+
data = nil
|
345
|
+
when /^bucket[\d]+/
|
346
|
+
#For buckets sum accumulations for internvals
|
347
|
+
data = vector.method('sum').call
|
295
348
|
else
|
296
349
|
data = vector.median
|
297
350
|
end
|
298
|
-
|
351
|
+
#Nil data is avoid (for example for 'bucket' name operation)
|
352
|
+
aggregator_data["#{field_name}_#{operation}"] = data unless data.nil?
|
299
353
|
end
|
300
354
|
end
|
301
355
|
# @log.debug aggregator_item_value
|
@@ -305,5 +359,24 @@ module DataOperations
|
|
305
359
|
aggregate_data[s_interval] << aggregator_data
|
306
360
|
end
|
307
361
|
end
|
362
|
+
|
363
|
+
#Return Array with count by each bucket
|
364
|
+
def calculate_buckets(data, buckets_config)
|
365
|
+
buckets_config.sort!.uniq!
|
366
|
+
buckets = {}
|
367
|
+
|
368
|
+
buckets_config.each {|bucket| buckets[bucket] = 0}
|
369
|
+
|
370
|
+
data.each {|item|
|
371
|
+
buckets_config.each {|bucket|
|
372
|
+
if item <= bucket
|
373
|
+
buckets[bucket] += 1
|
374
|
+
next
|
375
|
+
end
|
376
|
+
}
|
377
|
+
}
|
378
|
+
return buckets
|
379
|
+
end
|
380
|
+
|
308
381
|
end
|
309
382
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataoperations-aggregate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Guillen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-04-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: descriptive_statistics
|
@@ -50,7 +50,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
50
50
|
- !ruby/object:Gem::Version
|
51
51
|
version: '0'
|
52
52
|
requirements: []
|
53
|
-
rubygems_version: 3.0.3
|
53
|
+
rubygems_version: 3.0.3.1
|
54
54
|
signing_key:
|
55
55
|
specification_version: 4
|
56
56
|
summary: Aggregate data
|