dataoperations-aggregate 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/dataoperations-aggregate.rb +280 -252
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb5ea4c24213f92e05bb0061911b37aa6b5732c71e5ceb3b693271bb4e9ac853
4
- data.tar.gz: 946a56b993434a1c42e8f7a144ac4b1c25bfe52bc733d4eab3512946b183f45a
3
+ metadata.gz: af4af69996bbbaf33b749eded827f3d6f695e44bfcd1be0bda98ed8e027cd0f1
4
+ data.tar.gz: 94d67f60c8ff46462c84a9d0a186c6d240d99ca533d4db624cef97895efc5050
5
5
  SHA512:
6
- metadata.gz: cad19717f64525e8e0ff0f0dcc7805d29131a40230db71a9da422e44ea2c94b820ad1450600e1295a6ff6c53f4d2a59da501bed79e2d1618f27d590a724dbd80
7
- data.tar.gz: d5c0c95610cd59fae0dd38784a465e79d5ad542ff2a12d4453fe026fa7923796ab64112a012e1a10119ac519b452256756e50e816a289ee98f1fb8776a7d39d5
6
+ metadata.gz: a4cfe7be327d99b497d134d3ea3fad255aa46a8bf5b0153f96e5d2cd49bcc41ff444ada6b0cc08d2c0f2a398ff90f447f1959f7945dc3e0b8f506c1a90321c09
7
+ data.tar.gz: 754c6c3920f28c29d9c03dbd7a1c5a00fbd71f560f72e8e000dca1b7eb68283c61ca85e6bfde1a689d68dd1d2953d593549aa0ec72dc37573feddd3320ffce1b
@@ -3,277 +3,305 @@ require 'time'
3
3
  require 'descriptive_statistics'
4
4
  module DataOperations
5
5
  class Aggregate
6
- DEFAULT_TIME_FORMAT='%Y-%m-%dT%H:%M:%S.%L%:z'
7
- DEFAULT_TIME_FIELD='timestamp'
8
- DEFAULT_OUTPUT_TIME_FORMAT='%Y-%m-%dT%H:%M:%S.%L%z'
9
- DEFAULT_INTERVALS=[10]
10
- DEFAULT_FLUSH_INTERVAL=5
11
- DEFAULT_PROCESSING_MODE=:batch
12
- DEFAULT_FIELD_NO_DATA_VALUE='no_data'
13
- DEFAULT_AGGREGATIONS=['sum','min','max','mean','median','variance','standard_deviation']
14
- VALID_AGGREGATIONS = ['sum','min','max','mean','median','variance','standard_deviation']
15
- DEFAULT_HASH_TIME_FORMAT = '%Y-%m-%dT%H'
16
- DEFAULT_INERVAL_SECONDS = 3600
17
-
18
- def initialize(aggregator: {},
19
- time_format: DEFAULT_TIME_FORMAT,
20
- time_field: DEFAULT_TIME_FIELD,
21
- output_time_format: DEFAULT_OUTPUT_TIME_FORMAT,
22
- intervals:DEFAULT_INTERVALS,
23
- flush_interval:DEFAULT_FLUSH_INTERVAL,
24
- keep_interval:DEFAULT_KEEP_INTERVAL,
25
- field_no_data_value:DEFAULT_FIELD_NO_DATA_VALUE,
26
- processing_mode:DEFAULT_PROCESSING_MODE,
27
- aggregator_name:nil,
28
- log:Logger.new(STDOUT),
29
- aggregation_names:,
30
- group_field_names:,
31
- aggregate_field_names:
32
- )
33
- @aggregator = aggregator
34
- @time_format = time_format
35
- @time_field = time_field
36
- @output_time_format = output_time_format
37
- @intervals = intervals.uniq.sort!
38
- @flush_interval = flush_interval
39
- @keep_interval = keep_interval
40
- @field_no_data_value = field_no_data_value
41
- @processing_mode = processing_mode
42
- @aggregator_name = aggregator_name
43
-
44
-
45
- if aggregation_names.nil? || ! aggregation_names.is_a?(Array)
46
- raise "Configuration error, aggregation_names must be specified and Array"
47
- end
48
- if group_field_names.nil? || ! aggregation_names.is_a?(Array)
49
- raise "Configuration error, group_field_names must be specified and Array"
50
- end
51
- if aggregate_field_names.nil? || ! aggregation_names.is_a?(Array)
52
- raise "Configuration error, aggregate_field_names must be specified and Array"
53
- end
6
+ DEFAULT_TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%L%:z'.freeze
7
+ DEFAULT_TIME_FIELD = 'timestamp'.freeze
8
+ DEFAULT_OUTPUT_TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%L%z'.freeze
9
+ DEFAULT_INTERVALS = [10].freeze
10
+ DEFAULT_FLUSH_INTERVAL = 5
11
+ DEFAULT_PROCESSING_MODE = :batch
12
+ DEFAULT_FIELD_NO_DATA_VALUE = 'no_data'.freeze
13
+ DEFAULT_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
14
+ VALID_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
15
+ DEFAULT_HASH_TIME_FORMAT = '%Y-%m-%dT%H'.freeze
16
+ DEFAULT_INERVAL_SECONDS = 3600
17
+
18
+ def initialize(aggregator: {},
19
+ time_format: DEFAULT_TIME_FORMAT,
20
+ time_field: DEFAULT_TIME_FIELD,
21
+ output_time_format: DEFAULT_OUTPUT_TIME_FORMAT,
22
+ intervals: DEFAULT_INTERVALS,
23
+ flush_interval: DEFAULT_FLUSH_INTERVAL,
24
+ keep_interval: DEFAULT_KEEP_INTERVAL,
25
+ field_no_data_value: DEFAULT_FIELD_NO_DATA_VALUE,
26
+ processing_mode: DEFAULT_PROCESSING_MODE,
27
+ aggregator_name: nil,
28
+ log: Logger.new(STDOUT),
29
+ aggregation_names:,
30
+ group_field_names:,
31
+ aggregate_field_names:
32
+ )
33
+ @aggregator = aggregator
34
+ @time_format = time_format
35
+ @time_field = time_field
36
+ @output_time_format = output_time_format
37
+ @intervals = intervals.uniq.sort!
38
+ @flush_interval = flush_interval
39
+ @keep_interval = keep_interval
40
+ @field_no_data_value = field_no_data_value
41
+ @processing_mode = processing_mode
42
+ @aggregator_name = aggregator_name
54
43
 
55
- @log = log
56
44
 
57
- @hash_time_format = DEFAULT_HASH_TIME_FORMAT
58
- @interval_seconds = DEFAULT_INERVAL_SECONDS
45
+ if aggregation_names.nil? || !aggregation_names.is_a?(Array)
46
+ raise 'Configuration error, aggregation_names must be specified and Array'
47
+ end
48
+ if group_field_names.nil? || !aggregation_names.is_a?(Array)
49
+ raise 'Configuration error, group_field_names must be specified and Array'
50
+ end
51
+ if aggregate_field_names.nil? || !aggregation_names.is_a?(Array)
52
+ raise 'Configuration error, aggregate_field_names must be specified and Array'
53
+ end
59
54
 
60
- @aggregation_names = aggregation_names
61
- @group_field_names = group_field_names
62
- @aggregate_field_names = aggregate_field_names
55
+ @log = log
63
56
 
64
- @aggregation_names.each {|operation|
65
- if ! VALID_AGGREGATIONS.include?(operation)
66
- raise "aggregations must set any combination of sum,min,max,mean,median,variance,standard_deviation"
67
- end
68
- }
69
- @intervals.each {|interval|
70
- if ! (interval % @intervals[0] == 0)
71
- raise "interval: #{interval} must be multiple of first interval: #{@intervals[0]}"
72
- end
73
- }
57
+ @hash_time_format = DEFAULT_HASH_TIME_FORMAT
58
+ @interval_seconds = DEFAULT_INERVAL_SECONDS
74
59
 
75
- #TODO:
76
- # - Duplicate intervals - Done
77
- # - Sort intervals - Done
78
- # - Validate aggregation_names, group_field_names, aggregate_field_names
60
+ @aggregation_names = aggregation_names
61
+ @group_field_names = group_field_names
62
+ @aggregate_field_names = aggregate_field_names
63
+
64
+ @aggregation_names.each do |operation|
65
+ unless VALID_AGGREGATIONS.include?(operation)
66
+ raise 'aggregations must set any combination of sum,min,max,mean,median,variance,standard_deviation'
67
+ end
68
+ end
69
+ @intervals.each do |interval|
70
+ unless (interval % @intervals[0]).zero?
71
+ raise "interval: #{interval} must be multiple of first interval: #{@intervals[0]}"
72
+ end
79
73
  end
80
74
 
81
- def log_level(log_level)
82
- @log.level = log_level
75
+ # TODO:
76
+ # - Duplicate intervals - Done
77
+ # - Sort intervals - Done
78
+ # - Validate aggregation_names, group_field_names, aggregate_field_names
79
+ end
80
+
81
+ def log_level(log_level)
82
+ @log.level = log_level
83
+ end
84
+
85
+ def add_events(record)
86
+ timestamp = nil
87
+ if !record.key?(@time_field) || !(timestamp = DateTime.strptime(record[@time_field], @time_format).to_time.to_i)
88
+ timestamp = DateTime.now.to_time.to_i
83
89
  end
84
90
 
85
- def add_events(record)
86
- timestamp = nil
87
- if ! record.has_key?(@time_field) || ! (timestamp = DateTime.strptime(record[@time_field],@time_format).to_time.to_i)
88
- timestamp = DateTime.now.to_time.to_i
89
- end
91
+ current_interval_seconds = (timestamp / @intervals[0]) * @intervals[0]
92
+ aggregator_hash_key = current_interval_seconds
90
93
 
91
- current_interval_seconds = (timestamp / @intervals[0]) * @intervals[0]
92
- aggregator_hash_key = current_interval_seconds
94
+ hash_group_key = nil
95
+ @group_field_names.each do |field_name|
96
+ hash_group_key = !hash_group_key.nil? ? "#{hash_group_key}_#{field_name}:#{record[field_name]}" : "#{field_name}:#{record[field_name]}"
97
+ end
93
98
 
94
- hash_group_key = nil
95
- @group_field_names.each {|field_name|
96
- if ! hash_group_key.nil?
97
- hash_group_key = "#{hash_group_key}_#{field_name}:#{record[field_name]}"
98
- else
99
- hash_group_key = "#{field_name}:#{record[field_name]}"
100
- end
101
- }
102
-
103
- aggregator_item={}
104
- if @aggregator.has_key?(hash_group_key)
105
- aggregator_item = @aggregator[hash_group_key]
106
- else
107
- group_detail = {}
108
- aggregate_detail = {}
109
- interval_detail = {}
110
- @group_field_names.each {|field_name|
111
- if record.has_key?(field_name)
112
- group_detail[field_name] = record[field_name]
113
- else
114
- group_detail[field_name] = @field_no_data_value
115
- end
116
- }
117
-
118
- #Add interval empty data
119
- @intervals.each{|interval|
120
- interval_detail[interval.to_s]={}
121
- }
122
-
123
- aggregator_item["group_fields"]=group_detail
124
- aggregator_item["aggregate_fields"]=aggregate_detail
125
- aggregator_item["intervals"]=interval_detail
126
-
127
- @aggregator[hash_group_key]=aggregator_item
128
- end
99
+ aggregator_item = {}
100
+ if @aggregator.key?(hash_group_key)
101
+ aggregator_item = @aggregator[hash_group_key]
102
+ else
103
+ group_detail = {}
104
+ aggregate_detail = {}
105
+ interval_detail = {}
106
+ @group_field_names.each do |field_name|
107
+ group_detail[field_name] = record.key?(field_name) ? record[field_name] : @field_no_data_value
108
+ end
109
+
110
+ # Add interval empty data
111
+ @intervals.each do |interval|
112
+ interval_detail[interval.to_s] = {}
113
+ end
114
+
115
+ aggregator_item['group_fields'] = group_detail
116
+ aggregator_item['aggregate_fields'] = aggregate_detail
117
+ aggregator_item['intervals'] = interval_detail
118
+
119
+ @aggregator[hash_group_key] = aggregator_item
120
+ end
121
+
122
+ if !aggregator_item['aggregate_fields'].key?(aggregator_hash_key)
123
+ hash_aggregator = {}
124
+ hash_aggregator[:time_started] = Time.now.to_i
125
+ hash_aggregator['processed'] = 1
126
+ aggregator_item['aggregate_fields'][aggregator_hash_key] = hash_aggregator
127
+ else
128
+ aggregator_item['aggregate_fields'][aggregator_hash_key]['processed'] += 1
129
+ end
130
+
131
+ @aggregate_field_names.each do |field_name|
132
+ aggregate_values = []
133
+ if aggregator_item['aggregate_fields'][aggregator_hash_key].key?(field_name)
134
+ aggregate_values = aggregator_item['aggregate_fields'][aggregator_hash_key][field_name]
135
+ end
136
+ if record[field_name].is_a?(Integer) || record[field_name].is_a?(Float)
137
+ aggregate_values << record[field_name]
138
+ else
139
+ aggregate_values << 0
140
+ end
141
+ aggregator_item['aggregate_fields'][aggregator_hash_key][field_name] = aggregate_values
142
+ end
143
+ end
144
+
145
+ def aggregate_data
146
+ @aggregator
147
+ end
129
148
 
130
- if ! aggregator_item["aggregate_fields"].has_key?(aggregator_hash_key)
131
- hash_aggregator = {}
132
- hash_aggregator[:time_started]=Time.now.to_i
133
- hash_aggregator["processed"]=1
134
- aggregator_item["aggregate_fields"][aggregator_hash_key]=hash_aggregator
135
- else
136
- aggregator_item["aggregate_fields"][aggregator_hash_key]["processed"]+=1
149
+ def aggregate_events
150
+ aggregate_data = {}
151
+
152
+ # @log.debug @aggregator
153
+ # @aggregator_mutex.synchronize do
154
+ current_time = Time.now.to_i
155
+ @aggregator.each do |group_item_key, group_item_value|
156
+ aggregate_first_interval(aggregate_data, current_time, group_item_value)
157
+
158
+ # Calculate subsecuents aggregations
159
+ group_item_value['intervals'].keys[1..-1].each do |s_interval|
160
+ aggregate_subsequents_intervals(aggregate_data, current_time, group_item_value, s_interval)
161
+ end
162
+ end
163
+
164
+ # @log.debug aggregate_data
165
+ aggregate_data unless aggregate_data.empty?
166
+ # rescue Exception => e
167
+ # $log.error e
168
+ end
169
+
170
+ private
171
+
172
+ def aggregate_first_interval(aggregate_data, current_time, group_item_value)
173
+ group_item_value['aggregate_fields'].each do |aggregator_item_key, aggregator_item_value|
174
+ # If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
175
+ @processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + @intervals[0] + @keep_interval
176
+
177
+ # Is this data ready to aggregate (based on the ingest time), if @processing_mode is batch limit_time is 0
178
+ next unless current_time >= limit_time
179
+
180
+ aggregator_data = {}
181
+ aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
182
+ aggregator_data.merge!(group_item_value['group_fields'])
183
+
184
+ aggregator_data['time'] = aggregator_item_key
185
+ aggregator_data['processed'] = aggregator_item_value['processed']
186
+ if @aggregator_name
187
+ aggregator_data['aggregator_id'] = @aggregator_name
188
+ end
189
+
190
+ # Add entry in accumulative aggregation hash
191
+ group_item_value['intervals'].keys[1..-1].each do |interval_secs|
192
+ create_aggregation_hash(aggregator_item_key, aggregator_item_value, group_item_value, interval_secs)
193
+ end
194
+
195
+ aggregator_item_value.each do |aggregate_field_key, aggregate_field_value|
196
+ execute_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
197
+ end
198
+
199
+ group_item_value['aggregate_fields'].delete(aggregator_item_key)
200
+ if aggregate_data[group_item_value['intervals'].keys[0]].nil?
201
+ aggregate_data[group_item_value['intervals'].keys[0]] = []
202
+ end
203
+ aggregate_data[group_item_value['intervals'].keys[0]] << aggregator_data
204
+ end
205
+ end
206
+
207
+ def execute_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
208
+ # Create field metadata for subsecuents aggregations
209
+ create_metadata_aggregation(aggregate_field_key,
210
+ aggregate_field_value,
211
+ aggregator_data,
212
+ aggregator_item_key,
213
+ group_item_value)
214
+ # Aggregate data
215
+ if aggregate_field_value.is_a?(Array)
216
+ @aggregation_names.each do |operation|
217
+ data = aggregate_field_value.method(operation).call
218
+ aggregator_data["#{aggregate_field_key}_#{operation}"] = data
219
+
220
+ # Add aggregated data to interval
221
+ group_item_value['intervals'].keys[1..-1].each do |interval_secs|
222
+ interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
223
+ interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
224
+ interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] << data
137
225
  end
226
+ end
227
+ end
228
+ end
138
229
 
139
- @aggregate_field_names.each {|field_name|
140
- aggregate_values = []
141
- if aggregator_item["aggregate_fields"][aggregator_hash_key].has_key?(field_name)
142
- aggregate_values = aggregator_item["aggregate_fields"][aggregator_hash_key][field_name]
143
- end
144
- if record[field_name].is_a?(Integer) or record[field_name].is_a?(Float)
145
- aggregate_values << record[field_name]
146
- else
147
- aggregate_values << 0
148
- end
149
- aggregator_item["aggregate_fields"][aggregator_hash_key][field_name] = aggregate_values
150
- }
230
+ def create_metadata_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
231
+ group_item_value['intervals'].keys[1..-1].each do |interval_secs|
232
+ interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
233
+ interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
234
+
235
+ # @log.debug interval_aggregator_item_value
236
+ next unless !interval_aggregator_item_value['aggregate_fields'].key?(aggregate_field_key) && aggregate_field_value.is_a?(Array)
237
+
238
+ interval_aggregator_item_value['aggregate_fields'][aggregate_field_key] = {}
239
+ @aggregation_names.each do |operation|
240
+ interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] = []
241
+ end
242
+ end
243
+ end
244
+
245
+ def create_aggregation_hash(aggregator_item_key, aggregator_item_value, group_item_value, interval_secs)
246
+ interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
247
+ # @log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
248
+
249
+ if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
250
+ if interval_aggregator_item_value[:time_started] < aggregator_item_value[:time_started]
251
+ interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
252
+ end
253
+ interval_aggregator_item_value['processed'] += aggregator_item_value['processed']
254
+ # @log.debug interval_aggregator_item_value
255
+ else
256
+ interval_aggregator_item_value = {}
257
+ interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
258
+ interval_aggregator_item_value['aggregate_fields'] = {}
259
+ interval_aggregator_item_value['processed'] = aggregator_item_value['processed']
260
+ group_item_value['intervals'][interval_secs][interval_aggregator_item_key] = interval_aggregator_item_value
261
+ # @log.debug interval_aggregator_item_value
151
262
  end
263
+ end
152
264
 
153
- def aggregate_data
154
- @aggregator
265
+ def aggregate_subsequents_intervals(aggregate_data, current_time, group_item_value, s_interval)
266
+ group_item_value['intervals'][s_interval].each do |aggregator_item_key, aggregator_item_value|
267
+ acumulative_aggregation(aggregate_data, aggregator_item_key, aggregator_item_value, current_time, group_item_value, s_interval)
155
268
  end
269
+ end
270
+
271
+ def acumulative_aggregation(aggregate_data, aggregator_item_key, aggregator_item_value, current_time, group_item_value, s_interval)
272
+ interval = s_interval.to_i
273
+ # If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
274
+ limit_time = @processing_mode == :batch ? 0 : aggregator_item_value[:time_started] + interval + @keep_interval
275
+
276
+ # @log.debug "processing_mode:#{@processing_mode} limit_time:#{limit_time}"
277
+
278
+ unless current_time < limit_time
279
+ aggregator_data = {}
280
+ aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
281
+ aggregator_data.merge!(group_item_value['group_fields'])
156
282
 
157
- def aggregate_events
158
- aggregate_data = {}
159
-
160
- #@log.debug @aggregator
161
- #@aggregator_mutex.synchronize do
162
- current_time = Time.now.to_i
163
- @aggregator.each {|group_item_key,group_item_value|
164
- group_item_value["aggregate_fields"].each {|aggregator_item_key,aggregator_item_value|
165
- #If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
166
- @processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + @intervals[0] + @keep_interval
167
-
168
- #Is this data ready to aggregate (based on the ingest time), if @processing_mode is batch limit_time is 0
169
- if current_time >= limit_time
170
- aggregator_data = {}
171
- aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
172
- aggregator_data.merge!(group_item_value["group_fields"])
173
-
174
- aggregator_data["time"] = aggregator_item_key
175
- aggregator_data["processed"] = aggregator_item_value["processed"]
176
- aggregator_data["aggregator_id"] = @aggregator_name if @aggregator_name
177
-
178
- #Add entry in accumulative aggregation hash
179
- group_item_value['intervals'].keys[1..-1].each{|interval_secs|
180
- interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
181
- #@log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
182
-
183
- if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
184
- interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started] if interval_aggregator_item_value[:time_started] < aggregator_item_value[:time_started]
185
- interval_aggregator_item_value["processed"] += aggregator_item_value["processed"]
186
- #@log.debug interval_aggregator_item_value
187
- else
188
- interval_aggregator_item_value = {}
189
- interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
190
- interval_aggregator_item_value["aggregate_fields"]={}
191
- interval_aggregator_item_value["processed"] = aggregator_item_value["processed"]
192
- group_item_value['intervals'][interval_secs][interval_aggregator_item_key] = interval_aggregator_item_value
193
- #@log.debug interval_aggregator_item_value
194
- end
195
- }
196
-
197
- aggregator_item_value.each { |aggregate_field_key,aggregate_field_value|
198
- #Create field metadata for subsecuents aggregations
199
- group_item_value['intervals'].keys[1..-1].each{|interval_secs|
200
- interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
201
- interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
202
- #@log.debug interval_aggregator_item_value
203
- if ! interval_aggregator_item_value["aggregate_fields"].has_key?(aggregate_field_key) && aggregate_field_value.is_a?(Array)
204
- interval_aggregator_item_value["aggregate_fields"][aggregate_field_key]={}
205
- @aggregation_names.each {|operation|
206
- interval_aggregator_item_value["aggregate_fields"][aggregate_field_key][operation]=[]
207
- }
208
- end
209
- }
210
-
211
- #Aggregate data
212
- if aggregate_field_value.is_a?(Array)
213
- @aggregation_names.each {|operation|
214
- data = aggregate_field_value.method(operation).call
215
- aggregator_data["#{aggregate_field_key}_#{operation}"] = data
216
-
217
- #Add aggregated data to interval
218
- group_item_value['intervals'].keys[1..-1].each{|interval_secs|
219
- interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
220
- interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
221
- interval_aggregator_item_value["aggregate_fields"][aggregate_field_key][operation] << data
222
- }
223
- }
224
- end
225
- }
226
-
227
- group_item_value["aggregate_fields"].delete(aggregator_item_key)
228
- aggregate_data[group_item_value['intervals'].keys[0]] =[] if aggregate_data[group_item_value['intervals'].keys[0]].nil?
229
- aggregate_data[group_item_value['intervals'].keys[0]] << aggregator_data
230
- end
231
- }
232
-
233
- #Calculate subsecuents aggregations
234
- group_item_value["intervals"].keys[1..-1].each {|s_interval|
235
- group_item_value["intervals"][s_interval].each{|aggregator_item_key,aggregator_item_value|
236
- interval = s_interval.to_i
237
- #If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
238
- @processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + interval + @keep_interval
239
-
240
- #@log.debug "processing_mode:#{@processing_mode} limit_time:#{limit_time}"
241
-
242
- if current_time >= limit_time
243
- aggregator_data = {}
244
- aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
245
- aggregator_data.merge!(group_item_value["group_fields"])
246
-
247
- aggregator_data["time"] = aggregator_item_key
248
- aggregator_data["processed"] = aggregator_item_value["processed"]
249
- aggregator_data["aggregator_id"] = @aggregator_name if @aggregator_name
250
- aggregator_item_value["aggregate_fields"].each{|field_name,field_data|
251
- field_data.each{|operation,vector|
252
- case operation
253
- when 'max','min','mean','median'
254
- data = vector.method(operation).call
255
- else
256
- data = vector.median
257
- end
258
- aggregator_data["#{field_name}_#{operation}"] = data
259
- }
260
- }
261
- #@log.debug aggregator_item_value
262
- #@log.debug aggregator_data
263
- group_item_value["intervals"][s_interval].delete(aggregator_item_key)
264
- aggregate_data[s_interval] =[] if aggregate_data[s_interval].nil?
265
- aggregate_data[s_interval] << aggregator_data
266
- end
267
- }
268
- }
269
- }
270
-
271
- #@log.debug aggregate_data
272
- unless aggregate_data.empty?
273
- aggregate_data
283
+ aggregator_data['time'] = aggregator_item_key
284
+ aggregator_data['processed'] = aggregator_item_value['processed']
285
+ if @aggregator_name
286
+ aggregator_data['aggregator_id'] = @aggregator_name
287
+ end
288
+ aggregator_item_value['aggregate_fields'].each do |field_name, field_data|
289
+ field_data.each do |operation, vector|
290
+ case operation
291
+ when 'max', 'min', 'mean', 'median'
292
+ data = vector.method(operation).call
293
+ else
294
+ data = vector.median
295
+ end
296
+ aggregator_data["#{field_name}_#{operation}"] = data
274
297
  end
275
- #rescue Exception => e
276
- # $log.error e
298
+ end
299
+ # @log.debug aggregator_item_value
300
+ # @log.debug aggregator_data
301
+ group_item_value['intervals'][s_interval].delete(aggregator_item_key)
302
+ aggregate_data[s_interval] = [] if aggregate_data[s_interval].nil?
303
+ aggregate_data[s_interval] << aggregator_data
277
304
  end
305
+ end
278
306
  end
279
307
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dataoperations-aggregate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Guillen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-21 00:00:00.000000000 Z
11
+ date: 2020-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: descriptive_statistics