dataoperations-aggregate 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/dataoperations-aggregate.rb +280 -252
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af4af69996bbbaf33b749eded827f3d6f695e44bfcd1be0bda98ed8e027cd0f1
|
4
|
+
data.tar.gz: 94d67f60c8ff46462c84a9d0a186c6d240d99ca533d4db624cef97895efc5050
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a4cfe7be327d99b497d134d3ea3fad255aa46a8bf5b0153f96e5d2cd49bcc41ff444ada6b0cc08d2c0f2a398ff90f447f1959f7945dc3e0b8f506c1a90321c09
|
7
|
+
data.tar.gz: 754c6c3920f28c29d9c03dbd7a1c5a00fbd71f560f72e8e000dca1b7eb68283c61ca85e6bfde1a689d68dd1d2953d593549aa0ec72dc37573feddd3320ffce1b
|
@@ -3,277 +3,305 @@ require 'time'
|
|
3
3
|
require 'descriptive_statistics'
|
4
4
|
module DataOperations
|
5
5
|
class Aggregate
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
if aggregation_names.nil? || ! aggregation_names.is_a?(Array)
|
46
|
-
raise "Configuration error, aggregation_names must be specified and Array"
|
47
|
-
end
|
48
|
-
if group_field_names.nil? || ! aggregation_names.is_a?(Array)
|
49
|
-
raise "Configuration error, group_field_names must be specified and Array"
|
50
|
-
end
|
51
|
-
if aggregate_field_names.nil? || ! aggregation_names.is_a?(Array)
|
52
|
-
raise "Configuration error, aggregate_field_names must be specified and Array"
|
53
|
-
end
|
6
|
+
DEFAULT_TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%L%:z'.freeze
|
7
|
+
DEFAULT_TIME_FIELD = 'timestamp'.freeze
|
8
|
+
DEFAULT_OUTPUT_TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%L%z'.freeze
|
9
|
+
DEFAULT_INTERVALS = [10].freeze
|
10
|
+
DEFAULT_FLUSH_INTERVAL = 5
|
11
|
+
DEFAULT_PROCESSING_MODE = :batch
|
12
|
+
DEFAULT_FIELD_NO_DATA_VALUE = 'no_data'.freeze
|
13
|
+
DEFAULT_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
|
14
|
+
VALID_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
|
15
|
+
DEFAULT_HASH_TIME_FORMAT = '%Y-%m-%dT%H'.freeze
|
16
|
+
DEFAULT_INERVAL_SECONDS = 3600
|
17
|
+
|
18
|
+
def initialize(aggregator: {},
|
19
|
+
time_format: DEFAULT_TIME_FORMAT,
|
20
|
+
time_field: DEFAULT_TIME_FIELD,
|
21
|
+
output_time_format: DEFAULT_OUTPUT_TIME_FORMAT,
|
22
|
+
intervals: DEFAULT_INTERVALS,
|
23
|
+
flush_interval: DEFAULT_FLUSH_INTERVAL,
|
24
|
+
keep_interval: DEFAULT_KEEP_INTERVAL,
|
25
|
+
field_no_data_value: DEFAULT_FIELD_NO_DATA_VALUE,
|
26
|
+
processing_mode: DEFAULT_PROCESSING_MODE,
|
27
|
+
aggregator_name: nil,
|
28
|
+
log: Logger.new(STDOUT),
|
29
|
+
aggregation_names:,
|
30
|
+
group_field_names:,
|
31
|
+
aggregate_field_names:
|
32
|
+
)
|
33
|
+
@aggregator = aggregator
|
34
|
+
@time_format = time_format
|
35
|
+
@time_field = time_field
|
36
|
+
@output_time_format = output_time_format
|
37
|
+
@intervals = intervals.uniq.sort!
|
38
|
+
@flush_interval = flush_interval
|
39
|
+
@keep_interval = keep_interval
|
40
|
+
@field_no_data_value = field_no_data_value
|
41
|
+
@processing_mode = processing_mode
|
42
|
+
@aggregator_name = aggregator_name
|
54
43
|
|
55
|
-
@log = log
|
56
44
|
|
57
|
-
|
58
|
-
|
45
|
+
if aggregation_names.nil? || !aggregation_names.is_a?(Array)
|
46
|
+
raise 'Configuration error, aggregation_names must be specified and Array'
|
47
|
+
end
|
48
|
+
if group_field_names.nil? || !aggregation_names.is_a?(Array)
|
49
|
+
raise 'Configuration error, group_field_names must be specified and Array'
|
50
|
+
end
|
51
|
+
if aggregate_field_names.nil? || !aggregation_names.is_a?(Array)
|
52
|
+
raise 'Configuration error, aggregate_field_names must be specified and Array'
|
53
|
+
end
|
59
54
|
|
60
|
-
|
61
|
-
@group_field_names = group_field_names
|
62
|
-
@aggregate_field_names = aggregate_field_names
|
55
|
+
@log = log
|
63
56
|
|
64
|
-
|
65
|
-
|
66
|
-
raise "aggregations must set any combination of sum,min,max,mean,median,variance,standard_deviation"
|
67
|
-
end
|
68
|
-
}
|
69
|
-
@intervals.each {|interval|
|
70
|
-
if ! (interval % @intervals[0] == 0)
|
71
|
-
raise "interval: #{interval} must be multiple of first interval: #{@intervals[0]}"
|
72
|
-
end
|
73
|
-
}
|
57
|
+
@hash_time_format = DEFAULT_HASH_TIME_FORMAT
|
58
|
+
@interval_seconds = DEFAULT_INERVAL_SECONDS
|
74
59
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
60
|
+
@aggregation_names = aggregation_names
|
61
|
+
@group_field_names = group_field_names
|
62
|
+
@aggregate_field_names = aggregate_field_names
|
63
|
+
|
64
|
+
@aggregation_names.each do |operation|
|
65
|
+
unless VALID_AGGREGATIONS.include?(operation)
|
66
|
+
raise 'aggregations must set any combination of sum,min,max,mean,median,variance,standard_deviation'
|
67
|
+
end
|
68
|
+
end
|
69
|
+
@intervals.each do |interval|
|
70
|
+
unless (interval % @intervals[0]).zero?
|
71
|
+
raise "interval: #{interval} must be multiple of first interval: #{@intervals[0]}"
|
72
|
+
end
|
79
73
|
end
|
80
74
|
|
81
|
-
|
82
|
-
|
75
|
+
# TODO:
|
76
|
+
# - Duplicate intervals - Done
|
77
|
+
# - Sort intervals - Done
|
78
|
+
# - Validate aggregation_names, group_field_names, aggregate_field_names
|
79
|
+
end
|
80
|
+
|
81
|
+
def log_level(log_level)
|
82
|
+
@log.level = log_level
|
83
|
+
end
|
84
|
+
|
85
|
+
def add_events(record)
|
86
|
+
timestamp = nil
|
87
|
+
if !record.key?(@time_field) || !(timestamp = DateTime.strptime(record[@time_field], @time_format).to_time.to_i)
|
88
|
+
timestamp = DateTime.now.to_time.to_i
|
83
89
|
end
|
84
90
|
|
85
|
-
|
86
|
-
|
87
|
-
if ! record.has_key?(@time_field) || ! (timestamp = DateTime.strptime(record[@time_field],@time_format).to_time.to_i)
|
88
|
-
timestamp = DateTime.now.to_time.to_i
|
89
|
-
end
|
91
|
+
current_interval_seconds = (timestamp / @intervals[0]) * @intervals[0]
|
92
|
+
aggregator_hash_key = current_interval_seconds
|
90
93
|
|
91
|
-
|
92
|
-
|
94
|
+
hash_group_key = nil
|
95
|
+
@group_field_names.each do |field_name|
|
96
|
+
hash_group_key = !hash_group_key.nil? ? "#{hash_group_key}_#{field_name}:#{record[field_name]}" : "#{field_name}:#{record[field_name]}"
|
97
|
+
end
|
93
98
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
99
|
+
aggregator_item = {}
|
100
|
+
if @aggregator.key?(hash_group_key)
|
101
|
+
aggregator_item = @aggregator[hash_group_key]
|
102
|
+
else
|
103
|
+
group_detail = {}
|
104
|
+
aggregate_detail = {}
|
105
|
+
interval_detail = {}
|
106
|
+
@group_field_names.each do |field_name|
|
107
|
+
group_detail[field_name] = record.key?(field_name) ? record[field_name] : @field_no_data_value
|
108
|
+
end
|
109
|
+
|
110
|
+
# Add interval empty data
|
111
|
+
@intervals.each do |interval|
|
112
|
+
interval_detail[interval.to_s] = {}
|
113
|
+
end
|
114
|
+
|
115
|
+
aggregator_item['group_fields'] = group_detail
|
116
|
+
aggregator_item['aggregate_fields'] = aggregate_detail
|
117
|
+
aggregator_item['intervals'] = interval_detail
|
118
|
+
|
119
|
+
@aggregator[hash_group_key] = aggregator_item
|
120
|
+
end
|
121
|
+
|
122
|
+
if !aggregator_item['aggregate_fields'].key?(aggregator_hash_key)
|
123
|
+
hash_aggregator = {}
|
124
|
+
hash_aggregator[:time_started] = Time.now.to_i
|
125
|
+
hash_aggregator['processed'] = 1
|
126
|
+
aggregator_item['aggregate_fields'][aggregator_hash_key] = hash_aggregator
|
127
|
+
else
|
128
|
+
aggregator_item['aggregate_fields'][aggregator_hash_key]['processed'] += 1
|
129
|
+
end
|
130
|
+
|
131
|
+
@aggregate_field_names.each do |field_name|
|
132
|
+
aggregate_values = []
|
133
|
+
if aggregator_item['aggregate_fields'][aggregator_hash_key].key?(field_name)
|
134
|
+
aggregate_values = aggregator_item['aggregate_fields'][aggregator_hash_key][field_name]
|
135
|
+
end
|
136
|
+
if record[field_name].is_a?(Integer) || record[field_name].is_a?(Float)
|
137
|
+
aggregate_values << record[field_name]
|
138
|
+
else
|
139
|
+
aggregate_values << 0
|
140
|
+
end
|
141
|
+
aggregator_item['aggregate_fields'][aggregator_hash_key][field_name] = aggregate_values
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def aggregate_data
|
146
|
+
@aggregator
|
147
|
+
end
|
129
148
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
149
|
+
def aggregate_events
|
150
|
+
aggregate_data = {}
|
151
|
+
|
152
|
+
# @log.debug @aggregator
|
153
|
+
# @aggregator_mutex.synchronize do
|
154
|
+
current_time = Time.now.to_i
|
155
|
+
@aggregator.each do |group_item_key, group_item_value|
|
156
|
+
aggregate_first_interval(aggregate_data, current_time, group_item_value)
|
157
|
+
|
158
|
+
# Calculate subsecuents aggregations
|
159
|
+
group_item_value['intervals'].keys[1..-1].each do |s_interval|
|
160
|
+
aggregate_subsequents_intervals(aggregate_data, current_time, group_item_value, s_interval)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# @log.debug aggregate_data
|
165
|
+
aggregate_data unless aggregate_data.empty?
|
166
|
+
# rescue Exception => e
|
167
|
+
# $log.error e
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
|
172
|
+
def aggregate_first_interval(aggregate_data, current_time, group_item_value)
|
173
|
+
group_item_value['aggregate_fields'].each do |aggregator_item_key, aggregator_item_value|
|
174
|
+
# If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
|
175
|
+
@processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + @intervals[0] + @keep_interval
|
176
|
+
|
177
|
+
# Is this data ready to aggregate (based on the ingest time), if @processing_mode is batch limit_time is 0
|
178
|
+
next unless current_time >= limit_time
|
179
|
+
|
180
|
+
aggregator_data = {}
|
181
|
+
aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
|
182
|
+
aggregator_data.merge!(group_item_value['group_fields'])
|
183
|
+
|
184
|
+
aggregator_data['time'] = aggregator_item_key
|
185
|
+
aggregator_data['processed'] = aggregator_item_value['processed']
|
186
|
+
if @aggregator_name
|
187
|
+
aggregator_data['aggregator_id'] = @aggregator_name
|
188
|
+
end
|
189
|
+
|
190
|
+
# Add entry in accumulative aggregation hash
|
191
|
+
group_item_value['intervals'].keys[1..-1].each do |interval_secs|
|
192
|
+
create_aggregation_hash(aggregator_item_key, aggregator_item_value, group_item_value, interval_secs)
|
193
|
+
end
|
194
|
+
|
195
|
+
aggregator_item_value.each do |aggregate_field_key, aggregate_field_value|
|
196
|
+
execute_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
|
197
|
+
end
|
198
|
+
|
199
|
+
group_item_value['aggregate_fields'].delete(aggregator_item_key)
|
200
|
+
if aggregate_data[group_item_value['intervals'].keys[0]].nil?
|
201
|
+
aggregate_data[group_item_value['intervals'].keys[0]] = []
|
202
|
+
end
|
203
|
+
aggregate_data[group_item_value['intervals'].keys[0]] << aggregator_data
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def execute_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
|
208
|
+
# Create field metadata for subsecuents aggregations
|
209
|
+
create_metadata_aggregation(aggregate_field_key,
|
210
|
+
aggregate_field_value,
|
211
|
+
aggregator_data,
|
212
|
+
aggregator_item_key,
|
213
|
+
group_item_value)
|
214
|
+
# Aggregate data
|
215
|
+
if aggregate_field_value.is_a?(Array)
|
216
|
+
@aggregation_names.each do |operation|
|
217
|
+
data = aggregate_field_value.method(operation).call
|
218
|
+
aggregator_data["#{aggregate_field_key}_#{operation}"] = data
|
219
|
+
|
220
|
+
# Add aggregated data to interval
|
221
|
+
group_item_value['intervals'].keys[1..-1].each do |interval_secs|
|
222
|
+
interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
|
223
|
+
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
224
|
+
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] << data
|
137
225
|
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
138
229
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
230
|
+
def create_metadata_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
|
231
|
+
group_item_value['intervals'].keys[1..-1].each do |interval_secs|
|
232
|
+
interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
|
233
|
+
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
234
|
+
|
235
|
+
# @log.debug interval_aggregator_item_value
|
236
|
+
next unless !interval_aggregator_item_value['aggregate_fields'].key?(aggregate_field_key) && aggregate_field_value.is_a?(Array)
|
237
|
+
|
238
|
+
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key] = {}
|
239
|
+
@aggregation_names.each do |operation|
|
240
|
+
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] = []
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def create_aggregation_hash(aggregator_item_key, aggregator_item_value, group_item_value, interval_secs)
|
246
|
+
interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
|
247
|
+
# @log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
|
248
|
+
|
249
|
+
if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
250
|
+
if interval_aggregator_item_value[:time_started] < aggregator_item_value[:time_started]
|
251
|
+
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
|
252
|
+
end
|
253
|
+
interval_aggregator_item_value['processed'] += aggregator_item_value['processed']
|
254
|
+
# @log.debug interval_aggregator_item_value
|
255
|
+
else
|
256
|
+
interval_aggregator_item_value = {}
|
257
|
+
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
|
258
|
+
interval_aggregator_item_value['aggregate_fields'] = {}
|
259
|
+
interval_aggregator_item_value['processed'] = aggregator_item_value['processed']
|
260
|
+
group_item_value['intervals'][interval_secs][interval_aggregator_item_key] = interval_aggregator_item_value
|
261
|
+
# @log.debug interval_aggregator_item_value
|
151
262
|
end
|
263
|
+
end
|
152
264
|
|
153
|
-
|
154
|
-
|
265
|
+
def aggregate_subsequents_intervals(aggregate_data, current_time, group_item_value, s_interval)
|
266
|
+
group_item_value['intervals'][s_interval].each do |aggregator_item_key, aggregator_item_value|
|
267
|
+
acumulative_aggregation(aggregate_data, aggregator_item_key, aggregator_item_value, current_time, group_item_value, s_interval)
|
155
268
|
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def acumulative_aggregation(aggregate_data, aggregator_item_key, aggregator_item_value, current_time, group_item_value, s_interval)
|
272
|
+
interval = s_interval.to_i
|
273
|
+
# If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
|
274
|
+
limit_time = @processing_mode == :batch ? 0 : aggregator_item_value[:time_started] + interval + @keep_interval
|
275
|
+
|
276
|
+
# @log.debug "processing_mode:#{@processing_mode} limit_time:#{limit_time}"
|
277
|
+
|
278
|
+
unless current_time < limit_time
|
279
|
+
aggregator_data = {}
|
280
|
+
aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
|
281
|
+
aggregator_data.merge!(group_item_value['group_fields'])
|
156
282
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
|
172
|
-
aggregator_data.merge!(group_item_value["group_fields"])
|
173
|
-
|
174
|
-
aggregator_data["time"] = aggregator_item_key
|
175
|
-
aggregator_data["processed"] = aggregator_item_value["processed"]
|
176
|
-
aggregator_data["aggregator_id"] = @aggregator_name if @aggregator_name
|
177
|
-
|
178
|
-
#Add entry in accumulative aggregation hash
|
179
|
-
group_item_value['intervals'].keys[1..-1].each{|interval_secs|
|
180
|
-
interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
|
181
|
-
#@log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
|
182
|
-
|
183
|
-
if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
184
|
-
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started] if interval_aggregator_item_value[:time_started] < aggregator_item_value[:time_started]
|
185
|
-
interval_aggregator_item_value["processed"] += aggregator_item_value["processed"]
|
186
|
-
#@log.debug interval_aggregator_item_value
|
187
|
-
else
|
188
|
-
interval_aggregator_item_value = {}
|
189
|
-
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
|
190
|
-
interval_aggregator_item_value["aggregate_fields"]={}
|
191
|
-
interval_aggregator_item_value["processed"] = aggregator_item_value["processed"]
|
192
|
-
group_item_value['intervals'][interval_secs][interval_aggregator_item_key] = interval_aggregator_item_value
|
193
|
-
#@log.debug interval_aggregator_item_value
|
194
|
-
end
|
195
|
-
}
|
196
|
-
|
197
|
-
aggregator_item_value.each { |aggregate_field_key,aggregate_field_value|
|
198
|
-
#Create field metadata for subsecuents aggregations
|
199
|
-
group_item_value['intervals'].keys[1..-1].each{|interval_secs|
|
200
|
-
interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
|
201
|
-
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
202
|
-
#@log.debug interval_aggregator_item_value
|
203
|
-
if ! interval_aggregator_item_value["aggregate_fields"].has_key?(aggregate_field_key) && aggregate_field_value.is_a?(Array)
|
204
|
-
interval_aggregator_item_value["aggregate_fields"][aggregate_field_key]={}
|
205
|
-
@aggregation_names.each {|operation|
|
206
|
-
interval_aggregator_item_value["aggregate_fields"][aggregate_field_key][operation]=[]
|
207
|
-
}
|
208
|
-
end
|
209
|
-
}
|
210
|
-
|
211
|
-
#Aggregate data
|
212
|
-
if aggregate_field_value.is_a?(Array)
|
213
|
-
@aggregation_names.each {|operation|
|
214
|
-
data = aggregate_field_value.method(operation).call
|
215
|
-
aggregator_data["#{aggregate_field_key}_#{operation}"] = data
|
216
|
-
|
217
|
-
#Add aggregated data to interval
|
218
|
-
group_item_value['intervals'].keys[1..-1].each{|interval_secs|
|
219
|
-
interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
|
220
|
-
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
221
|
-
interval_aggregator_item_value["aggregate_fields"][aggregate_field_key][operation] << data
|
222
|
-
}
|
223
|
-
}
|
224
|
-
end
|
225
|
-
}
|
226
|
-
|
227
|
-
group_item_value["aggregate_fields"].delete(aggregator_item_key)
|
228
|
-
aggregate_data[group_item_value['intervals'].keys[0]] =[] if aggregate_data[group_item_value['intervals'].keys[0]].nil?
|
229
|
-
aggregate_data[group_item_value['intervals'].keys[0]] << aggregator_data
|
230
|
-
end
|
231
|
-
}
|
232
|
-
|
233
|
-
#Calculate subsecuents aggregations
|
234
|
-
group_item_value["intervals"].keys[1..-1].each {|s_interval|
|
235
|
-
group_item_value["intervals"][s_interval].each{|aggregator_item_key,aggregator_item_value|
|
236
|
-
interval = s_interval.to_i
|
237
|
-
#If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
|
238
|
-
@processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + interval + @keep_interval
|
239
|
-
|
240
|
-
#@log.debug "processing_mode:#{@processing_mode} limit_time:#{limit_time}"
|
241
|
-
|
242
|
-
if current_time >= limit_time
|
243
|
-
aggregator_data = {}
|
244
|
-
aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
|
245
|
-
aggregator_data.merge!(group_item_value["group_fields"])
|
246
|
-
|
247
|
-
aggregator_data["time"] = aggregator_item_key
|
248
|
-
aggregator_data["processed"] = aggregator_item_value["processed"]
|
249
|
-
aggregator_data["aggregator_id"] = @aggregator_name if @aggregator_name
|
250
|
-
aggregator_item_value["aggregate_fields"].each{|field_name,field_data|
|
251
|
-
field_data.each{|operation,vector|
|
252
|
-
case operation
|
253
|
-
when 'max','min','mean','median'
|
254
|
-
data = vector.method(operation).call
|
255
|
-
else
|
256
|
-
data = vector.median
|
257
|
-
end
|
258
|
-
aggregator_data["#{field_name}_#{operation}"] = data
|
259
|
-
}
|
260
|
-
}
|
261
|
-
#@log.debug aggregator_item_value
|
262
|
-
#@log.debug aggregator_data
|
263
|
-
group_item_value["intervals"][s_interval].delete(aggregator_item_key)
|
264
|
-
aggregate_data[s_interval] =[] if aggregate_data[s_interval].nil?
|
265
|
-
aggregate_data[s_interval] << aggregator_data
|
266
|
-
end
|
267
|
-
}
|
268
|
-
}
|
269
|
-
}
|
270
|
-
|
271
|
-
#@log.debug aggregate_data
|
272
|
-
unless aggregate_data.empty?
|
273
|
-
aggregate_data
|
283
|
+
aggregator_data['time'] = aggregator_item_key
|
284
|
+
aggregator_data['processed'] = aggregator_item_value['processed']
|
285
|
+
if @aggregator_name
|
286
|
+
aggregator_data['aggregator_id'] = @aggregator_name
|
287
|
+
end
|
288
|
+
aggregator_item_value['aggregate_fields'].each do |field_name, field_data|
|
289
|
+
field_data.each do |operation, vector|
|
290
|
+
case operation
|
291
|
+
when 'max', 'min', 'mean', 'median'
|
292
|
+
data = vector.method(operation).call
|
293
|
+
else
|
294
|
+
data = vector.median
|
295
|
+
end
|
296
|
+
aggregator_data["#{field_name}_#{operation}"] = data
|
274
297
|
end
|
275
|
-
|
276
|
-
|
298
|
+
end
|
299
|
+
# @log.debug aggregator_item_value
|
300
|
+
# @log.debug aggregator_data
|
301
|
+
group_item_value['intervals'][s_interval].delete(aggregator_item_key)
|
302
|
+
aggregate_data[s_interval] = [] if aggregate_data[s_interval].nil?
|
303
|
+
aggregate_data[s_interval] << aggregator_data
|
277
304
|
end
|
305
|
+
end
|
278
306
|
end
|
279
307
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataoperations-aggregate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Guillen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: descriptive_statistics
|