dataoperations-aggregate 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/dataoperations-aggregate.rb +87 -14
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 641acf44ae266d4790320ae506cc968b20fe9fdcbc63b7fb50b0cb41c13a77b6
4
- data.tar.gz: adeed75be0d03687fa1700e3ee5c8048885a904b0e8ad5667969328e8a4472a0
3
+ metadata.gz: 88229d7d1a3eb43c59f593c15ed5350189f534927d849b8bb75e70583610bdb6
4
+ data.tar.gz: 812bea576d59f4211e659c6e6da8f37cdabbac7a01faadc9e8f09e3512f0ea7b
5
5
  SHA512:
6
- metadata.gz: 3b7f039bc8f7046051a8150576eb6a29ba10edb6287251bd0980a211eecec41b69bcda10dde9cf8d920f5dcf07c7be1752b2b211dda5419f7fd7c5686cfd0195
7
- data.tar.gz: 12f5981b5fc696c4cb0b36a68dd38a5ad814085bf8f3aded30cfb2e2446607f0971a6adc9220cb29f3615ca44a9673c8dcac18f8d825bd1a28755bdb4e92a3cd
6
+ metadata.gz: dabc7016ecefa788042959f1184b258fe70d2468ed5e72e9a675902c1f2f53941559fe5e5083887f131662daf65e4f6339bf788269509f41da3093dd090b68e2
7
+ data.tar.gz: d0da00e819b18b73d0a942e2b8b871af276300936043cb09ebec48e0277bd1eb283ffbc57fdee3e73c1d400aae0149d7cfc9634dc45bf7533a9f68daa18f730f
@@ -9,9 +9,10 @@ module DataOperations
9
9
  DEFAULT_INTERVALS = [10].freeze
10
10
  DEFAULT_FLUSH_INTERVAL = 5
11
11
  DEFAULT_PROCESSING_MODE = :batch
12
+ DEFAULT_TIME_STARTED_MODE = :first_message
12
13
  DEFAULT_FIELD_NO_DATA_VALUE = 'no_data'.freeze
13
14
  DEFAULT_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
14
- VALID_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
15
+ VALID_AGGREGATIONS = %w[sum min max mean median variance standard_deviation bucket].freeze
15
16
  DEFAULT_HASH_TIME_FORMAT = '%Y-%m-%dT%H'.freeze
16
17
  DEFAULT_INERVAL_SECONDS = 3600
17
18
 
@@ -24,11 +25,14 @@ module DataOperations
24
25
  keep_interval: DEFAULT_KEEP_INTERVAL,
25
26
  field_no_data_value: DEFAULT_FIELD_NO_DATA_VALUE,
26
27
  processing_mode: DEFAULT_PROCESSING_MODE,
28
+ time_started_mode: DEFAULT_TIME_STARTED_MODE,
27
29
  aggregator_name: nil,
28
30
  log: Logger.new(STDOUT),
29
31
  aggregation_names:,
30
32
  group_field_names:,
31
- aggregate_field_names:
33
+ aggregate_field_names:,
34
+ buckets:[],
35
+ bucket_metrics:[]
32
36
  )
33
37
  @aggregator = aggregator
34
38
  @time_format = time_format
@@ -39,7 +43,10 @@ module DataOperations
39
43
  @keep_interval = keep_interval
40
44
  @field_no_data_value = field_no_data_value
41
45
  @processing_mode = processing_mode
46
+ @time_started_mode = time_started_mode
42
47
  @aggregator_name = aggregator_name
48
+ @buckets = buckets
49
+ @bucket_metrics = bucket_metrics
43
50
 
44
51
 
45
52
  if aggregation_names.nil? || !aggregation_names.is_a?(Array)
@@ -117,14 +124,14 @@ module DataOperations
117
124
  aggregator_item['aggregate_fields'] = aggregate_detail
118
125
  aggregator_item['intervals'] = interval_detail
119
126
 
120
- @aggregator_mutex.synchronize {@aggregator[hash_group_key] = aggregator_item}
127
+ @aggregator_mutex.synchronize {@aggregator[hash_group_key] = aggregator_item}
121
128
  end
122
129
 
123
130
  if !aggregator_item['aggregate_fields'].key?(aggregator_hash_key)
124
131
  hash_aggregator = {}
125
132
  hash_aggregator[:time_started] = Time.now.to_i
126
133
  hash_aggregator['processed'] = 1
127
- @aggregator_mutex.synchronize {aggregator_item['aggregate_fields'][aggregator_hash_key] = hash_aggregator}
134
+ @aggregator_mutex.synchronize {aggregator_item['aggregate_fields'][aggregator_hash_key] = hash_aggregator}
128
135
  else
129
136
  aggregator_item['aggregate_fields'][aggregator_hash_key]['processed'] += 1
130
137
  end
@@ -216,16 +223,51 @@ module DataOperations
216
223
  # Aggregate data
217
224
  if aggregate_field_value.is_a?(Array)
218
225
  @aggregation_names.each do |operation|
219
- data = aggregate_field_value.method(operation).call
220
- aggregator_data["#{aggregate_field_key}_#{operation}"] = data
221
-
222
- # Add aggregated data to interval
223
- group_item_value['intervals'].keys[1..-1].each do |interval_secs|
224
- interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
225
- interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
226
- interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] << data
226
+
227
+ #If bucket, calculate bucket for metric
228
+ if operation == 'bucket'
229
+ #If set buckets and set metrics to calculate (bucket values depends of ranges based in metric activity)
230
+ if !@buckets.nil? && !@bucket_metrics.nil? && @bucket_metrics.include?(aggregate_field_key)
231
+ data_bucket = calculate_buckets(aggregate_field_value, @buckets)
232
+
233
+ data_bucket.each {|bucket,bucket_count|
234
+ #@log.info("#{aggregate_field_key}_#{bucket} = #{bucket_count}")
235
+ aggregator_data["#{aggregate_field_key}_bucket#{bucket}"] = bucket_count
236
+ }
237
+
238
+ # Add aggregated data to interval
239
+ group_item_value['intervals'].keys[1..-1].each do |interval_secs|
240
+
241
+ interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
242
+ interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
243
+ data_bucket.each {|bucket,bucket_count|
244
+ #@log.info("#{aggregate_field_key}_#{bucket} = #{bucket_count}")
245
+ interval_aggregator_item_value['aggregate_fields'][aggregate_field_key]["bucket#{bucket}"] = [] if interval_aggregator_item_value['aggregate_fields'][aggregate_field_key]["bucket#{bucket}"].nil?
246
+ interval_aggregator_item_value['aggregate_fields'][aggregate_field_key]["bucket#{bucket}"] << bucket_count
247
+ }
248
+
249
+ end
250
+
251
+ end
252
+ else
253
+ data = aggregate_field_value.method(operation).call
254
+ aggregator_data["#{aggregate_field_key}_#{operation}"] = data
255
+
256
+ # Add aggregated data to interval
257
+ group_item_value['intervals'].keys[1..-1].each do |interval_secs|
258
+ interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
259
+ interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
260
+ interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] << data
261
+ end
227
262
  end
263
+
264
+
265
+ end
266
+
267
+ if !@buckets.nil? && ! @bucket_metrics.nil?
268
+ #data = calculate_buckets(data, @buckets)
228
269
  end
270
+
229
271
  end
230
272
  end
231
273
 
@@ -241,6 +283,9 @@ module DataOperations
241
283
  @aggregation_names.each do |operation|
242
284
  interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] = []
243
285
  end
286
+
287
+ ##Add buckets metadata (empty hash)
288
+ ##interval_aggregator_item_value['aggregate_fields'][aggregate_field_key]['buckets']={}
244
289
  end
245
290
  end
246
291
 
@@ -249,7 +294,9 @@ module DataOperations
249
294
  # @log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
250
295
 
251
296
  if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
252
- if interval_aggregator_item_value[:time_started] < aggregator_item_value[:time_started]
297
+ if @time_started_mode == :first_event && aggregator_item_value[:time_started] < interval_aggregator_item_value[:time_started]
298
+ interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
299
+ elseif @time_started_mode == :last_event && aggregator_item_value[:time_started] > interval_aggregator_item_value[:time_started]
253
300
  interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
254
301
  end
255
302
  interval_aggregator_item_value['processed'] += aggregator_item_value['processed']
@@ -292,10 +339,17 @@ module DataOperations
292
339
  case operation
293
340
  when 'max', 'min', 'mean', 'median'
294
341
  data = vector.method(operation).call
342
+ when 'bucket'
343
+ #Bucket operation generate bucket[\d]+
344
+ data = nil
345
+ when /^bucket[\d]+/
346
+ #For buckets sum accumulations for internvals
347
+ data = vector.method('sum').call
295
348
  else
296
349
  data = vector.median
297
350
  end
298
- aggregator_data["#{field_name}_#{operation}"] = data
351
+ #Nil data is avoid (for example for 'bucket' name operation)
352
+ aggregator_data["#{field_name}_#{operation}"] = data unless data.nil?
299
353
  end
300
354
  end
301
355
  # @log.debug aggregator_item_value
@@ -305,5 +359,24 @@ module DataOperations
305
359
  aggregate_data[s_interval] << aggregator_data
306
360
  end
307
361
  end
362
+
363
+ #Return Array with count by each bucket
364
+ def calculate_buckets(data, buckets_config)
365
+ buckets_config.sort!.uniq!
366
+ buckets = {}
367
+
368
+ buckets_config.each {|bucket| buckets[bucket] = 0}
369
+
370
+ data.each {|item|
371
+ buckets_config.each {|bucket|
372
+ if item <= bucket
373
+ buckets[bucket] += 1
374
+ next
375
+ end
376
+ }
377
+ }
378
+ return buckets
379
+ end
380
+
308
381
  end
309
382
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataoperations-aggregate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Guillen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-26 00:00:00.000000000 Z
11
+ date: 2022-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: descriptive_statistics
@@ -50,7 +50,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
50
50
  - !ruby/object:Gem::Version
51
51
  version: '0'
52
52
  requirements: []
53
- rubygems_version: 3.0.3
53
+ rubygems_version: 3.0.3.1
54
54
  signing_key:
55
55
  specification_version: 4
56
56
  summary: Aggregate data