dataoperations-aggregate 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/dataoperations-aggregate.rb +280 -252
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb5ea4c24213f92e05bb0061911b37aa6b5732c71e5ceb3b693271bb4e9ac853
4
- data.tar.gz: 946a56b993434a1c42e8f7a144ac4b1c25bfe52bc733d4eab3512946b183f45a
3
+ metadata.gz: af4af69996bbbaf33b749eded827f3d6f695e44bfcd1be0bda98ed8e027cd0f1
4
+ data.tar.gz: 94d67f60c8ff46462c84a9d0a186c6d240d99ca533d4db624cef97895efc5050
5
5
  SHA512:
6
- metadata.gz: cad19717f64525e8e0ff0f0dcc7805d29131a40230db71a9da422e44ea2c94b820ad1450600e1295a6ff6c53f4d2a59da501bed79e2d1618f27d590a724dbd80
7
- data.tar.gz: d5c0c95610cd59fae0dd38784a465e79d5ad542ff2a12d4453fe026fa7923796ab64112a012e1a10119ac519b452256756e50e816a289ee98f1fb8776a7d39d5
6
+ metadata.gz: a4cfe7be327d99b497d134d3ea3fad255aa46a8bf5b0153f96e5d2cd49bcc41ff444ada6b0cc08d2c0f2a398ff90f447f1959f7945dc3e0b8f506c1a90321c09
7
+ data.tar.gz: 754c6c3920f28c29d9c03dbd7a1c5a00fbd71f560f72e8e000dca1b7eb68283c61ca85e6bfde1a689d68dd1d2953d593549aa0ec72dc37573feddd3320ffce1b
@@ -3,277 +3,305 @@ require 'time'
3
3
  require 'descriptive_statistics'
4
4
  module DataOperations
5
5
  class Aggregate
6
- DEFAULT_TIME_FORMAT='%Y-%m-%dT%H:%M:%S.%L%:z'
7
- DEFAULT_TIME_FIELD='timestamp'
8
- DEFAULT_OUTPUT_TIME_FORMAT='%Y-%m-%dT%H:%M:%S.%L%z'
9
- DEFAULT_INTERVALS=[10]
10
- DEFAULT_FLUSH_INTERVAL=5
11
- DEFAULT_PROCESSING_MODE=:batch
12
- DEFAULT_FIELD_NO_DATA_VALUE='no_data'
13
- DEFAULT_AGGREGATIONS=['sum','min','max','mean','median','variance','standard_deviation']
14
- VALID_AGGREGATIONS = ['sum','min','max','mean','median','variance','standard_deviation']
15
- DEFAULT_HASH_TIME_FORMAT = '%Y-%m-%dT%H'
16
- DEFAULT_INERVAL_SECONDS = 3600
17
-
18
- def initialize(aggregator: {},
19
- time_format: DEFAULT_TIME_FORMAT,
20
- time_field: DEFAULT_TIME_FIELD,
21
- output_time_format: DEFAULT_OUTPUT_TIME_FORMAT,
22
- intervals:DEFAULT_INTERVALS,
23
- flush_interval:DEFAULT_FLUSH_INTERVAL,
24
- keep_interval:DEFAULT_KEEP_INTERVAL,
25
- field_no_data_value:DEFAULT_FIELD_NO_DATA_VALUE,
26
- processing_mode:DEFAULT_PROCESSING_MODE,
27
- aggregator_name:nil,
28
- log:Logger.new(STDOUT),
29
- aggregation_names:,
30
- group_field_names:,
31
- aggregate_field_names:
32
- )
33
- @aggregator = aggregator
34
- @time_format = time_format
35
- @time_field = time_field
36
- @output_time_format = output_time_format
37
- @intervals = intervals.uniq.sort!
38
- @flush_interval = flush_interval
39
- @keep_interval = keep_interval
40
- @field_no_data_value = field_no_data_value
41
- @processing_mode = processing_mode
42
- @aggregator_name = aggregator_name
43
-
44
-
45
- if aggregation_names.nil? || ! aggregation_names.is_a?(Array)
46
- raise "Configuration error, aggregation_names must be specified and Array"
47
- end
48
- if group_field_names.nil? || ! aggregation_names.is_a?(Array)
49
- raise "Configuration error, group_field_names must be specified and Array"
50
- end
51
- if aggregate_field_names.nil? || ! aggregation_names.is_a?(Array)
52
- raise "Configuration error, aggregate_field_names must be specified and Array"
53
- end
6
+ DEFAULT_TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%L%:z'.freeze
7
+ DEFAULT_TIME_FIELD = 'timestamp'.freeze
8
+ DEFAULT_OUTPUT_TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%L%z'.freeze
9
+ DEFAULT_INTERVALS = [10].freeze
10
+ DEFAULT_FLUSH_INTERVAL = 5
11
+ DEFAULT_PROCESSING_MODE = :batch
12
+ DEFAULT_FIELD_NO_DATA_VALUE = 'no_data'.freeze
13
+ DEFAULT_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
14
+ VALID_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
15
+ DEFAULT_HASH_TIME_FORMAT = '%Y-%m-%dT%H'.freeze
16
+ DEFAULT_INERVAL_SECONDS = 3600
17
+
18
+ def initialize(aggregator: {},
19
+ time_format: DEFAULT_TIME_FORMAT,
20
+ time_field: DEFAULT_TIME_FIELD,
21
+ output_time_format: DEFAULT_OUTPUT_TIME_FORMAT,
22
+ intervals: DEFAULT_INTERVALS,
23
+ flush_interval: DEFAULT_FLUSH_INTERVAL,
24
+ keep_interval: DEFAULT_KEEP_INTERVAL,
25
+ field_no_data_value: DEFAULT_FIELD_NO_DATA_VALUE,
26
+ processing_mode: DEFAULT_PROCESSING_MODE,
27
+ aggregator_name: nil,
28
+ log: Logger.new(STDOUT),
29
+ aggregation_names:,
30
+ group_field_names:,
31
+ aggregate_field_names:
32
+ )
33
+ @aggregator = aggregator
34
+ @time_format = time_format
35
+ @time_field = time_field
36
+ @output_time_format = output_time_format
37
+ @intervals = intervals.uniq.sort!
38
+ @flush_interval = flush_interval
39
+ @keep_interval = keep_interval
40
+ @field_no_data_value = field_no_data_value
41
+ @processing_mode = processing_mode
42
+ @aggregator_name = aggregator_name
54
43
 
55
- @log = log
56
44
 
57
- @hash_time_format = DEFAULT_HASH_TIME_FORMAT
58
- @interval_seconds = DEFAULT_INERVAL_SECONDS
45
+ if aggregation_names.nil? || !aggregation_names.is_a?(Array)
46
+ raise 'Configuration error, aggregation_names must be specified and Array'
47
+ end
48
+ if group_field_names.nil? || !aggregation_names.is_a?(Array)
49
+ raise 'Configuration error, group_field_names must be specified and Array'
50
+ end
51
+ if aggregate_field_names.nil? || !aggregation_names.is_a?(Array)
52
+ raise 'Configuration error, aggregate_field_names must be specified and Array'
53
+ end
59
54
 
60
- @aggregation_names = aggregation_names
61
- @group_field_names = group_field_names
62
- @aggregate_field_names = aggregate_field_names
55
+ @log = log
63
56
 
64
- @aggregation_names.each {|operation|
65
- if ! VALID_AGGREGATIONS.include?(operation)
66
- raise "aggregations must set any combination of sum,min,max,mean,median,variance,standard_deviation"
67
- end
68
- }
69
- @intervals.each {|interval|
70
- if ! (interval % @intervals[0] == 0)
71
- raise "interval: #{interval} must be multiple of first interval: #{@intervals[0]}"
72
- end
73
- }
57
+ @hash_time_format = DEFAULT_HASH_TIME_FORMAT
58
+ @interval_seconds = DEFAULT_INERVAL_SECONDS
74
59
 
75
- #TODO:
76
- # - Duplicate intervals - Done
77
- # - Sort intervals - Done
78
- # - Validate aggregation_names, group_field_names, aggregate_field_names
60
+ @aggregation_names = aggregation_names
61
+ @group_field_names = group_field_names
62
+ @aggregate_field_names = aggregate_field_names
63
+
64
+ @aggregation_names.each do |operation|
65
+ unless VALID_AGGREGATIONS.include?(operation)
66
+ raise 'aggregations must set any combination of sum,min,max,mean,median,variance,standard_deviation'
67
+ end
68
+ end
69
+ @intervals.each do |interval|
70
+ unless (interval % @intervals[0]).zero?
71
+ raise "interval: #{interval} must be multiple of first interval: #{@intervals[0]}"
72
+ end
79
73
  end
80
74
 
81
- def log_level(log_level)
82
- @log.level = log_level
75
+ # TODO:
76
+ # - Duplicate intervals - Done
77
+ # - Sort intervals - Done
78
+ # - Validate aggregation_names, group_field_names, aggregate_field_names
79
+ end
80
+
81
+ def log_level(log_level)
82
+ @log.level = log_level
83
+ end
84
+
85
+ def add_events(record)
86
+ timestamp = nil
87
+ if !record.key?(@time_field) || !(timestamp = DateTime.strptime(record[@time_field], @time_format).to_time.to_i)
88
+ timestamp = DateTime.now.to_time.to_i
83
89
  end
84
90
 
85
- def add_events(record)
86
- timestamp = nil
87
- if ! record.has_key?(@time_field) || ! (timestamp = DateTime.strptime(record[@time_field],@time_format).to_time.to_i)
88
- timestamp = DateTime.now.to_time.to_i
89
- end
91
+ current_interval_seconds = (timestamp / @intervals[0]) * @intervals[0]
92
+ aggregator_hash_key = current_interval_seconds
90
93
 
91
- current_interval_seconds = (timestamp / @intervals[0]) * @intervals[0]
92
- aggregator_hash_key = current_interval_seconds
94
+ hash_group_key = nil
95
+ @group_field_names.each do |field_name|
96
+ hash_group_key = !hash_group_key.nil? ? "#{hash_group_key}_#{field_name}:#{record[field_name]}" : "#{field_name}:#{record[field_name]}"
97
+ end
93
98
 
94
- hash_group_key = nil
95
- @group_field_names.each {|field_name|
96
- if ! hash_group_key.nil?
97
- hash_group_key = "#{hash_group_key}_#{field_name}:#{record[field_name]}"
98
- else
99
- hash_group_key = "#{field_name}:#{record[field_name]}"
100
- end
101
- }
102
-
103
- aggregator_item={}
104
- if @aggregator.has_key?(hash_group_key)
105
- aggregator_item = @aggregator[hash_group_key]
106
- else
107
- group_detail = {}
108
- aggregate_detail = {}
109
- interval_detail = {}
110
- @group_field_names.each {|field_name|
111
- if record.has_key?(field_name)
112
- group_detail[field_name] = record[field_name]
113
- else
114
- group_detail[field_name] = @field_no_data_value
115
- end
116
- }
117
-
118
- #Add interval empty data
119
- @intervals.each{|interval|
120
- interval_detail[interval.to_s]={}
121
- }
122
-
123
- aggregator_item["group_fields"]=group_detail
124
- aggregator_item["aggregate_fields"]=aggregate_detail
125
- aggregator_item["intervals"]=interval_detail
126
-
127
- @aggregator[hash_group_key]=aggregator_item
128
- end
99
+ aggregator_item = {}
100
+ if @aggregator.key?(hash_group_key)
101
+ aggregator_item = @aggregator[hash_group_key]
102
+ else
103
+ group_detail = {}
104
+ aggregate_detail = {}
105
+ interval_detail = {}
106
+ @group_field_names.each do |field_name|
107
+ group_detail[field_name] = record.key?(field_name) ? record[field_name] : @field_no_data_value
108
+ end
109
+
110
+ # Add interval empty data
111
+ @intervals.each do |interval|
112
+ interval_detail[interval.to_s] = {}
113
+ end
114
+
115
+ aggregator_item['group_fields'] = group_detail
116
+ aggregator_item['aggregate_fields'] = aggregate_detail
117
+ aggregator_item['intervals'] = interval_detail
118
+
119
+ @aggregator[hash_group_key] = aggregator_item
120
+ end
121
+
122
+ if !aggregator_item['aggregate_fields'].key?(aggregator_hash_key)
123
+ hash_aggregator = {}
124
+ hash_aggregator[:time_started] = Time.now.to_i
125
+ hash_aggregator['processed'] = 1
126
+ aggregator_item['aggregate_fields'][aggregator_hash_key] = hash_aggregator
127
+ else
128
+ aggregator_item['aggregate_fields'][aggregator_hash_key]['processed'] += 1
129
+ end
130
+
131
+ @aggregate_field_names.each do |field_name|
132
+ aggregate_values = []
133
+ if aggregator_item['aggregate_fields'][aggregator_hash_key].key?(field_name)
134
+ aggregate_values = aggregator_item['aggregate_fields'][aggregator_hash_key][field_name]
135
+ end
136
+ if record[field_name].is_a?(Integer) || record[field_name].is_a?(Float)
137
+ aggregate_values << record[field_name]
138
+ else
139
+ aggregate_values << 0
140
+ end
141
+ aggregator_item['aggregate_fields'][aggregator_hash_key][field_name] = aggregate_values
142
+ end
143
+ end
144
+
145
+ def aggregate_data
146
+ @aggregator
147
+ end
129
148
 
130
- if ! aggregator_item["aggregate_fields"].has_key?(aggregator_hash_key)
131
- hash_aggregator = {}
132
- hash_aggregator[:time_started]=Time.now.to_i
133
- hash_aggregator["processed"]=1
134
- aggregator_item["aggregate_fields"][aggregator_hash_key]=hash_aggregator
135
- else
136
- aggregator_item["aggregate_fields"][aggregator_hash_key]["processed"]+=1
149
+ def aggregate_events
150
+ aggregate_data = {}
151
+
152
+ # @log.debug @aggregator
153
+ # @aggregator_mutex.synchronize do
154
+ current_time = Time.now.to_i
155
+ @aggregator.each do |group_item_key, group_item_value|
156
+ aggregate_first_interval(aggregate_data, current_time, group_item_value)
157
+
158
+ # Calculate subsecuents aggregations
159
+ group_item_value['intervals'].keys[1..-1].each do |s_interval|
160
+ aggregate_subsequents_intervals(aggregate_data, current_time, group_item_value, s_interval)
161
+ end
162
+ end
163
+
164
+ # @log.debug aggregate_data
165
+ aggregate_data unless aggregate_data.empty?
166
+ # rescue Exception => e
167
+ # $log.error e
168
+ end
169
+
170
+ private
171
+
172
+ def aggregate_first_interval(aggregate_data, current_time, group_item_value)
173
+ group_item_value['aggregate_fields'].each do |aggregator_item_key, aggregator_item_value|
174
+ # If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
175
+ @processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + @intervals[0] + @keep_interval
176
+
177
+ # Is this data ready to aggregate (based on the ingest time), if @processing_mode is batch limit_time is 0
178
+ next unless current_time >= limit_time
179
+
180
+ aggregator_data = {}
181
+ aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
182
+ aggregator_data.merge!(group_item_value['group_fields'])
183
+
184
+ aggregator_data['time'] = aggregator_item_key
185
+ aggregator_data['processed'] = aggregator_item_value['processed']
186
+ if @aggregator_name
187
+ aggregator_data['aggregator_id'] = @aggregator_name
188
+ end
189
+
190
+ # Add entry in accumulative aggregation hash
191
+ group_item_value['intervals'].keys[1..-1].each do |interval_secs|
192
+ create_aggregation_hash(aggregator_item_key, aggregator_item_value, group_item_value, interval_secs)
193
+ end
194
+
195
+ aggregator_item_value.each do |aggregate_field_key, aggregate_field_value|
196
+ execute_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
197
+ end
198
+
199
+ group_item_value['aggregate_fields'].delete(aggregator_item_key)
200
+ if aggregate_data[group_item_value['intervals'].keys[0]].nil?
201
+ aggregate_data[group_item_value['intervals'].keys[0]] = []
202
+ end
203
+ aggregate_data[group_item_value['intervals'].keys[0]] << aggregator_data
204
+ end
205
+ end
206
+
207
+ def execute_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
208
+ # Create field metadata for subsecuents aggregations
209
+ create_metadata_aggregation(aggregate_field_key,
210
+ aggregate_field_value,
211
+ aggregator_data,
212
+ aggregator_item_key,
213
+ group_item_value)
214
+ # Aggregate data
215
+ if aggregate_field_value.is_a?(Array)
216
+ @aggregation_names.each do |operation|
217
+ data = aggregate_field_value.method(operation).call
218
+ aggregator_data["#{aggregate_field_key}_#{operation}"] = data
219
+
220
+ # Add aggregated data to interval
221
+ group_item_value['intervals'].keys[1..-1].each do |interval_secs|
222
+ interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
223
+ interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
224
+ interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] << data
137
225
  end
226
+ end
227
+ end
228
+ end
138
229
 
139
- @aggregate_field_names.each {|field_name|
140
- aggregate_values = []
141
- if aggregator_item["aggregate_fields"][aggregator_hash_key].has_key?(field_name)
142
- aggregate_values = aggregator_item["aggregate_fields"][aggregator_hash_key][field_name]
143
- end
144
- if record[field_name].is_a?(Integer) or record[field_name].is_a?(Float)
145
- aggregate_values << record[field_name]
146
- else
147
- aggregate_values << 0
148
- end
149
- aggregator_item["aggregate_fields"][aggregator_hash_key][field_name] = aggregate_values
150
- }
230
+ def create_metadata_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
231
+ group_item_value['intervals'].keys[1..-1].each do |interval_secs|
232
+ interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
233
+ interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
234
+
235
+ # @log.debug interval_aggregator_item_value
236
+ next unless !interval_aggregator_item_value['aggregate_fields'].key?(aggregate_field_key) && aggregate_field_value.is_a?(Array)
237
+
238
+ interval_aggregator_item_value['aggregate_fields'][aggregate_field_key] = {}
239
+ @aggregation_names.each do |operation|
240
+ interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] = []
241
+ end
242
+ end
243
+ end
244
+
245
+ def create_aggregation_hash(aggregator_item_key, aggregator_item_value, group_item_value, interval_secs)
246
+ interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
247
+ # @log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
248
+
249
+ if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
250
+ if interval_aggregator_item_value[:time_started] < aggregator_item_value[:time_started]
251
+ interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
252
+ end
253
+ interval_aggregator_item_value['processed'] += aggregator_item_value['processed']
254
+ # @log.debug interval_aggregator_item_value
255
+ else
256
+ interval_aggregator_item_value = {}
257
+ interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
258
+ interval_aggregator_item_value['aggregate_fields'] = {}
259
+ interval_aggregator_item_value['processed'] = aggregator_item_value['processed']
260
+ group_item_value['intervals'][interval_secs][interval_aggregator_item_key] = interval_aggregator_item_value
261
+ # @log.debug interval_aggregator_item_value
151
262
  end
263
+ end
152
264
 
153
- def aggregate_data
154
- @aggregator
265
+ def aggregate_subsequents_intervals(aggregate_data, current_time, group_item_value, s_interval)
266
+ group_item_value['intervals'][s_interval].each do |aggregator_item_key, aggregator_item_value|
267
+ acumulative_aggregation(aggregate_data, aggregator_item_key, aggregator_item_value, current_time, group_item_value, s_interval)
155
268
  end
269
+ end
270
+
271
+ def acumulative_aggregation(aggregate_data, aggregator_item_key, aggregator_item_value, current_time, group_item_value, s_interval)
272
+ interval = s_interval.to_i
273
+ # If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
274
+ limit_time = @processing_mode == :batch ? 0 : aggregator_item_value[:time_started] + interval + @keep_interval
275
+
276
+ # @log.debug "processing_mode:#{@processing_mode} limit_time:#{limit_time}"
277
+
278
+ unless current_time < limit_time
279
+ aggregator_data = {}
280
+ aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
281
+ aggregator_data.merge!(group_item_value['group_fields'])
156
282
 
157
- def aggregate_events
158
- aggregate_data = {}
159
-
160
- #@log.debug @aggregator
161
- #@aggregator_mutex.synchronize do
162
- current_time = Time.now.to_i
163
- @aggregator.each {|group_item_key,group_item_value|
164
- group_item_value["aggregate_fields"].each {|aggregator_item_key,aggregator_item_value|
165
- #If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
166
- @processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + @intervals[0] + @keep_interval
167
-
168
- #Is this data ready to aggregate (based on the ingest time), if @processing_mode is batch limit_time is 0
169
- if current_time >= limit_time
170
- aggregator_data = {}
171
- aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
172
- aggregator_data.merge!(group_item_value["group_fields"])
173
-
174
- aggregator_data["time"] = aggregator_item_key
175
- aggregator_data["processed"] = aggregator_item_value["processed"]
176
- aggregator_data["aggregator_id"] = @aggregator_name if @aggregator_name
177
-
178
- #Add entry in accumulative aggregation hash
179
- group_item_value['intervals'].keys[1..-1].each{|interval_secs|
180
- interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
181
- #@log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
182
-
183
- if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
184
- interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started] if interval_aggregator_item_value[:time_started] < aggregator_item_value[:time_started]
185
- interval_aggregator_item_value["processed"] += aggregator_item_value["processed"]
186
- #@log.debug interval_aggregator_item_value
187
- else
188
- interval_aggregator_item_value = {}
189
- interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
190
- interval_aggregator_item_value["aggregate_fields"]={}
191
- interval_aggregator_item_value["processed"] = aggregator_item_value["processed"]
192
- group_item_value['intervals'][interval_secs][interval_aggregator_item_key] = interval_aggregator_item_value
193
- #@log.debug interval_aggregator_item_value
194
- end
195
- }
196
-
197
- aggregator_item_value.each { |aggregate_field_key,aggregate_field_value|
198
- #Create field metadata for subsecuents aggregations
199
- group_item_value['intervals'].keys[1..-1].each{|interval_secs|
200
- interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
201
- interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
202
- #@log.debug interval_aggregator_item_value
203
- if ! interval_aggregator_item_value["aggregate_fields"].has_key?(aggregate_field_key) && aggregate_field_value.is_a?(Array)
204
- interval_aggregator_item_value["aggregate_fields"][aggregate_field_key]={}
205
- @aggregation_names.each {|operation|
206
- interval_aggregator_item_value["aggregate_fields"][aggregate_field_key][operation]=[]
207
- }
208
- end
209
- }
210
-
211
- #Aggregate data
212
- if aggregate_field_value.is_a?(Array)
213
- @aggregation_names.each {|operation|
214
- data = aggregate_field_value.method(operation).call
215
- aggregator_data["#{aggregate_field_key}_#{operation}"] = data
216
-
217
- #Add aggregated data to interval
218
- group_item_value['intervals'].keys[1..-1].each{|interval_secs|
219
- interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
220
- interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
221
- interval_aggregator_item_value["aggregate_fields"][aggregate_field_key][operation] << data
222
- }
223
- }
224
- end
225
- }
226
-
227
- group_item_value["aggregate_fields"].delete(aggregator_item_key)
228
- aggregate_data[group_item_value['intervals'].keys[0]] =[] if aggregate_data[group_item_value['intervals'].keys[0]].nil?
229
- aggregate_data[group_item_value['intervals'].keys[0]] << aggregator_data
230
- end
231
- }
232
-
233
- #Calculate subsecuents aggregations
234
- group_item_value["intervals"].keys[1..-1].each {|s_interval|
235
- group_item_value["intervals"][s_interval].each{|aggregator_item_key,aggregator_item_value|
236
- interval = s_interval.to_i
237
- #If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
238
- @processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + interval + @keep_interval
239
-
240
- #@log.debug "processing_mode:#{@processing_mode} limit_time:#{limit_time}"
241
-
242
- if current_time >= limit_time
243
- aggregator_data = {}
244
- aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
245
- aggregator_data.merge!(group_item_value["group_fields"])
246
-
247
- aggregator_data["time"] = aggregator_item_key
248
- aggregator_data["processed"] = aggregator_item_value["processed"]
249
- aggregator_data["aggregator_id"] = @aggregator_name if @aggregator_name
250
- aggregator_item_value["aggregate_fields"].each{|field_name,field_data|
251
- field_data.each{|operation,vector|
252
- case operation
253
- when 'max','min','mean','median'
254
- data = vector.method(operation).call
255
- else
256
- data = vector.median
257
- end
258
- aggregator_data["#{field_name}_#{operation}"] = data
259
- }
260
- }
261
- #@log.debug aggregator_item_value
262
- #@log.debug aggregator_data
263
- group_item_value["intervals"][s_interval].delete(aggregator_item_key)
264
- aggregate_data[s_interval] =[] if aggregate_data[s_interval].nil?
265
- aggregate_data[s_interval] << aggregator_data
266
- end
267
- }
268
- }
269
- }
270
-
271
- #@log.debug aggregate_data
272
- unless aggregate_data.empty?
273
- aggregate_data
283
+ aggregator_data['time'] = aggregator_item_key
284
+ aggregator_data['processed'] = aggregator_item_value['processed']
285
+ if @aggregator_name
286
+ aggregator_data['aggregator_id'] = @aggregator_name
287
+ end
288
+ aggregator_item_value['aggregate_fields'].each do |field_name, field_data|
289
+ field_data.each do |operation, vector|
290
+ case operation
291
+ when 'max', 'min', 'mean', 'median'
292
+ data = vector.method(operation).call
293
+ else
294
+ data = vector.median
295
+ end
296
+ aggregator_data["#{field_name}_#{operation}"] = data
274
297
  end
275
- #rescue Exception => e
276
- # $log.error e
298
+ end
299
+ # @log.debug aggregator_item_value
300
+ # @log.debug aggregator_data
301
+ group_item_value['intervals'][s_interval].delete(aggregator_item_key)
302
+ aggregate_data[s_interval] = [] if aggregate_data[s_interval].nil?
303
+ aggregate_data[s_interval] << aggregator_data
277
304
  end
305
+ end
278
306
  end
279
307
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataoperations-aggregate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Guillen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-21 00:00:00.000000000 Z
11
+ date: 2020-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: descriptive_statistics