dataoperations-aggregate 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/dataoperations-aggregate.rb +280 -252
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af4af69996bbbaf33b749eded827f3d6f695e44bfcd1be0bda98ed8e027cd0f1
|
4
|
+
data.tar.gz: 94d67f60c8ff46462c84a9d0a186c6d240d99ca533d4db624cef97895efc5050
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a4cfe7be327d99b497d134d3ea3fad255aa46a8bf5b0153f96e5d2cd49bcc41ff444ada6b0cc08d2c0f2a398ff90f447f1959f7945dc3e0b8f506c1a90321c09
|
7
|
+
data.tar.gz: 754c6c3920f28c29d9c03dbd7a1c5a00fbd71f560f72e8e000dca1b7eb68283c61ca85e6bfde1a689d68dd1d2953d593549aa0ec72dc37573feddd3320ffce1b
|
@@ -3,277 +3,305 @@ require 'time'
|
|
3
3
|
require 'descriptive_statistics'
|
4
4
|
module DataOperations
|
5
5
|
class Aggregate
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
if aggregation_names.nil? || ! aggregation_names.is_a?(Array)
|
46
|
-
raise "Configuration error, aggregation_names must be specified and Array"
|
47
|
-
end
|
48
|
-
if group_field_names.nil? || ! aggregation_names.is_a?(Array)
|
49
|
-
raise "Configuration error, group_field_names must be specified and Array"
|
50
|
-
end
|
51
|
-
if aggregate_field_names.nil? || ! aggregation_names.is_a?(Array)
|
52
|
-
raise "Configuration error, aggregate_field_names must be specified and Array"
|
53
|
-
end
|
6
|
+
DEFAULT_TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%L%:z'.freeze
|
7
|
+
DEFAULT_TIME_FIELD = 'timestamp'.freeze
|
8
|
+
DEFAULT_OUTPUT_TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%L%z'.freeze
|
9
|
+
DEFAULT_INTERVALS = [10].freeze
|
10
|
+
DEFAULT_FLUSH_INTERVAL = 5
|
11
|
+
DEFAULT_PROCESSING_MODE = :batch
|
12
|
+
DEFAULT_FIELD_NO_DATA_VALUE = 'no_data'.freeze
|
13
|
+
DEFAULT_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
|
14
|
+
VALID_AGGREGATIONS = %w[sum min max mean median variance standard_deviation].freeze
|
15
|
+
DEFAULT_HASH_TIME_FORMAT = '%Y-%m-%dT%H'.freeze
|
16
|
+
DEFAULT_INERVAL_SECONDS = 3600
|
17
|
+
|
18
|
+
def initialize(aggregator: {},
|
19
|
+
time_format: DEFAULT_TIME_FORMAT,
|
20
|
+
time_field: DEFAULT_TIME_FIELD,
|
21
|
+
output_time_format: DEFAULT_OUTPUT_TIME_FORMAT,
|
22
|
+
intervals: DEFAULT_INTERVALS,
|
23
|
+
flush_interval: DEFAULT_FLUSH_INTERVAL,
|
24
|
+
keep_interval: DEFAULT_KEEP_INTERVAL,
|
25
|
+
field_no_data_value: DEFAULT_FIELD_NO_DATA_VALUE,
|
26
|
+
processing_mode: DEFAULT_PROCESSING_MODE,
|
27
|
+
aggregator_name: nil,
|
28
|
+
log: Logger.new(STDOUT),
|
29
|
+
aggregation_names:,
|
30
|
+
group_field_names:,
|
31
|
+
aggregate_field_names:
|
32
|
+
)
|
33
|
+
@aggregator = aggregator
|
34
|
+
@time_format = time_format
|
35
|
+
@time_field = time_field
|
36
|
+
@output_time_format = output_time_format
|
37
|
+
@intervals = intervals.uniq.sort!
|
38
|
+
@flush_interval = flush_interval
|
39
|
+
@keep_interval = keep_interval
|
40
|
+
@field_no_data_value = field_no_data_value
|
41
|
+
@processing_mode = processing_mode
|
42
|
+
@aggregator_name = aggregator_name
|
54
43
|
|
55
|
-
@log = log
|
56
44
|
|
57
|
-
|
58
|
-
|
45
|
+
if aggregation_names.nil? || !aggregation_names.is_a?(Array)
|
46
|
+
raise 'Configuration error, aggregation_names must be specified and Array'
|
47
|
+
end
|
48
|
+
if group_field_names.nil? || !aggregation_names.is_a?(Array)
|
49
|
+
raise 'Configuration error, group_field_names must be specified and Array'
|
50
|
+
end
|
51
|
+
if aggregate_field_names.nil? || !aggregation_names.is_a?(Array)
|
52
|
+
raise 'Configuration error, aggregate_field_names must be specified and Array'
|
53
|
+
end
|
59
54
|
|
60
|
-
|
61
|
-
@group_field_names = group_field_names
|
62
|
-
@aggregate_field_names = aggregate_field_names
|
55
|
+
@log = log
|
63
56
|
|
64
|
-
|
65
|
-
|
66
|
-
raise "aggregations must set any combination of sum,min,max,mean,median,variance,standard_deviation"
|
67
|
-
end
|
68
|
-
}
|
69
|
-
@intervals.each {|interval|
|
70
|
-
if ! (interval % @intervals[0] == 0)
|
71
|
-
raise "interval: #{interval} must be multiple of first interval: #{@intervals[0]}"
|
72
|
-
end
|
73
|
-
}
|
57
|
+
@hash_time_format = DEFAULT_HASH_TIME_FORMAT
|
58
|
+
@interval_seconds = DEFAULT_INERVAL_SECONDS
|
74
59
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
60
|
+
@aggregation_names = aggregation_names
|
61
|
+
@group_field_names = group_field_names
|
62
|
+
@aggregate_field_names = aggregate_field_names
|
63
|
+
|
64
|
+
@aggregation_names.each do |operation|
|
65
|
+
unless VALID_AGGREGATIONS.include?(operation)
|
66
|
+
raise 'aggregations must set any combination of sum,min,max,mean,median,variance,standard_deviation'
|
67
|
+
end
|
68
|
+
end
|
69
|
+
@intervals.each do |interval|
|
70
|
+
unless (interval % @intervals[0]).zero?
|
71
|
+
raise "interval: #{interval} must be multiple of first interval: #{@intervals[0]}"
|
72
|
+
end
|
79
73
|
end
|
80
74
|
|
81
|
-
|
82
|
-
|
75
|
+
# TODO:
|
76
|
+
# - Duplicate intervals - Done
|
77
|
+
# - Sort intervals - Done
|
78
|
+
# - Validate aggregation_names, group_field_names, aggregate_field_names
|
79
|
+
end
|
80
|
+
|
81
|
+
def log_level(log_level)
|
82
|
+
@log.level = log_level
|
83
|
+
end
|
84
|
+
|
85
|
+
def add_events(record)
|
86
|
+
timestamp = nil
|
87
|
+
if !record.key?(@time_field) || !(timestamp = DateTime.strptime(record[@time_field], @time_format).to_time.to_i)
|
88
|
+
timestamp = DateTime.now.to_time.to_i
|
83
89
|
end
|
84
90
|
|
85
|
-
|
86
|
-
|
87
|
-
if ! record.has_key?(@time_field) || ! (timestamp = DateTime.strptime(record[@time_field],@time_format).to_time.to_i)
|
88
|
-
timestamp = DateTime.now.to_time.to_i
|
89
|
-
end
|
91
|
+
current_interval_seconds = (timestamp / @intervals[0]) * @intervals[0]
|
92
|
+
aggregator_hash_key = current_interval_seconds
|
90
93
|
|
91
|
-
|
92
|
-
|
94
|
+
hash_group_key = nil
|
95
|
+
@group_field_names.each do |field_name|
|
96
|
+
hash_group_key = !hash_group_key.nil? ? "#{hash_group_key}_#{field_name}:#{record[field_name]}" : "#{field_name}:#{record[field_name]}"
|
97
|
+
end
|
93
98
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
99
|
+
aggregator_item = {}
|
100
|
+
if @aggregator.key?(hash_group_key)
|
101
|
+
aggregator_item = @aggregator[hash_group_key]
|
102
|
+
else
|
103
|
+
group_detail = {}
|
104
|
+
aggregate_detail = {}
|
105
|
+
interval_detail = {}
|
106
|
+
@group_field_names.each do |field_name|
|
107
|
+
group_detail[field_name] = record.key?(field_name) ? record[field_name] : @field_no_data_value
|
108
|
+
end
|
109
|
+
|
110
|
+
# Add interval empty data
|
111
|
+
@intervals.each do |interval|
|
112
|
+
interval_detail[interval.to_s] = {}
|
113
|
+
end
|
114
|
+
|
115
|
+
aggregator_item['group_fields'] = group_detail
|
116
|
+
aggregator_item['aggregate_fields'] = aggregate_detail
|
117
|
+
aggregator_item['intervals'] = interval_detail
|
118
|
+
|
119
|
+
@aggregator[hash_group_key] = aggregator_item
|
120
|
+
end
|
121
|
+
|
122
|
+
if !aggregator_item['aggregate_fields'].key?(aggregator_hash_key)
|
123
|
+
hash_aggregator = {}
|
124
|
+
hash_aggregator[:time_started] = Time.now.to_i
|
125
|
+
hash_aggregator['processed'] = 1
|
126
|
+
aggregator_item['aggregate_fields'][aggregator_hash_key] = hash_aggregator
|
127
|
+
else
|
128
|
+
aggregator_item['aggregate_fields'][aggregator_hash_key]['processed'] += 1
|
129
|
+
end
|
130
|
+
|
131
|
+
@aggregate_field_names.each do |field_name|
|
132
|
+
aggregate_values = []
|
133
|
+
if aggregator_item['aggregate_fields'][aggregator_hash_key].key?(field_name)
|
134
|
+
aggregate_values = aggregator_item['aggregate_fields'][aggregator_hash_key][field_name]
|
135
|
+
end
|
136
|
+
if record[field_name].is_a?(Integer) || record[field_name].is_a?(Float)
|
137
|
+
aggregate_values << record[field_name]
|
138
|
+
else
|
139
|
+
aggregate_values << 0
|
140
|
+
end
|
141
|
+
aggregator_item['aggregate_fields'][aggregator_hash_key][field_name] = aggregate_values
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def aggregate_data
|
146
|
+
@aggregator
|
147
|
+
end
|
129
148
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
149
|
+
def aggregate_events
|
150
|
+
aggregate_data = {}
|
151
|
+
|
152
|
+
# @log.debug @aggregator
|
153
|
+
# @aggregator_mutex.synchronize do
|
154
|
+
current_time = Time.now.to_i
|
155
|
+
@aggregator.each do |group_item_key, group_item_value|
|
156
|
+
aggregate_first_interval(aggregate_data, current_time, group_item_value)
|
157
|
+
|
158
|
+
# Calculate subsecuents aggregations
|
159
|
+
group_item_value['intervals'].keys[1..-1].each do |s_interval|
|
160
|
+
aggregate_subsequents_intervals(aggregate_data, current_time, group_item_value, s_interval)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# @log.debug aggregate_data
|
165
|
+
aggregate_data unless aggregate_data.empty?
|
166
|
+
# rescue Exception => e
|
167
|
+
# $log.error e
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
|
172
|
+
def aggregate_first_interval(aggregate_data, current_time, group_item_value)
|
173
|
+
group_item_value['aggregate_fields'].each do |aggregator_item_key, aggregator_item_value|
|
174
|
+
# If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
|
175
|
+
@processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + @intervals[0] + @keep_interval
|
176
|
+
|
177
|
+
# Is this data ready to aggregate (based on the ingest time), if @processing_mode is batch limit_time is 0
|
178
|
+
next unless current_time >= limit_time
|
179
|
+
|
180
|
+
aggregator_data = {}
|
181
|
+
aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
|
182
|
+
aggregator_data.merge!(group_item_value['group_fields'])
|
183
|
+
|
184
|
+
aggregator_data['time'] = aggregator_item_key
|
185
|
+
aggregator_data['processed'] = aggregator_item_value['processed']
|
186
|
+
if @aggregator_name
|
187
|
+
aggregator_data['aggregator_id'] = @aggregator_name
|
188
|
+
end
|
189
|
+
|
190
|
+
# Add entry in accumulative aggregation hash
|
191
|
+
group_item_value['intervals'].keys[1..-1].each do |interval_secs|
|
192
|
+
create_aggregation_hash(aggregator_item_key, aggregator_item_value, group_item_value, interval_secs)
|
193
|
+
end
|
194
|
+
|
195
|
+
aggregator_item_value.each do |aggregate_field_key, aggregate_field_value|
|
196
|
+
execute_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
|
197
|
+
end
|
198
|
+
|
199
|
+
group_item_value['aggregate_fields'].delete(aggregator_item_key)
|
200
|
+
if aggregate_data[group_item_value['intervals'].keys[0]].nil?
|
201
|
+
aggregate_data[group_item_value['intervals'].keys[0]] = []
|
202
|
+
end
|
203
|
+
aggregate_data[group_item_value['intervals'].keys[0]] << aggregator_data
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def execute_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
|
208
|
+
# Create field metadata for subsecuents aggregations
|
209
|
+
create_metadata_aggregation(aggregate_field_key,
|
210
|
+
aggregate_field_value,
|
211
|
+
aggregator_data,
|
212
|
+
aggregator_item_key,
|
213
|
+
group_item_value)
|
214
|
+
# Aggregate data
|
215
|
+
if aggregate_field_value.is_a?(Array)
|
216
|
+
@aggregation_names.each do |operation|
|
217
|
+
data = aggregate_field_value.method(operation).call
|
218
|
+
aggregator_data["#{aggregate_field_key}_#{operation}"] = data
|
219
|
+
|
220
|
+
# Add aggregated data to interval
|
221
|
+
group_item_value['intervals'].keys[1..-1].each do |interval_secs|
|
222
|
+
interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
|
223
|
+
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
224
|
+
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] << data
|
137
225
|
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
138
229
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
230
|
+
def create_metadata_aggregation(aggregate_field_key, aggregate_field_value, aggregator_data, aggregator_item_key, group_item_value)
|
231
|
+
group_item_value['intervals'].keys[1..-1].each do |interval_secs|
|
232
|
+
interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
|
233
|
+
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
234
|
+
|
235
|
+
# @log.debug interval_aggregator_item_value
|
236
|
+
next unless !interval_aggregator_item_value['aggregate_fields'].key?(aggregate_field_key) && aggregate_field_value.is_a?(Array)
|
237
|
+
|
238
|
+
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key] = {}
|
239
|
+
@aggregation_names.each do |operation|
|
240
|
+
interval_aggregator_item_value['aggregate_fields'][aggregate_field_key][operation] = []
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def create_aggregation_hash(aggregator_item_key, aggregator_item_value, group_item_value, interval_secs)
|
246
|
+
interval_aggregator_item_key = (aggregator_item_key / interval_secs.to_i) * interval_secs.to_i
|
247
|
+
# @log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
|
248
|
+
|
249
|
+
if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
250
|
+
if interval_aggregator_item_value[:time_started] < aggregator_item_value[:time_started]
|
251
|
+
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
|
252
|
+
end
|
253
|
+
interval_aggregator_item_value['processed'] += aggregator_item_value['processed']
|
254
|
+
# @log.debug interval_aggregator_item_value
|
255
|
+
else
|
256
|
+
interval_aggregator_item_value = {}
|
257
|
+
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
|
258
|
+
interval_aggregator_item_value['aggregate_fields'] = {}
|
259
|
+
interval_aggregator_item_value['processed'] = aggregator_item_value['processed']
|
260
|
+
group_item_value['intervals'][interval_secs][interval_aggregator_item_key] = interval_aggregator_item_value
|
261
|
+
# @log.debug interval_aggregator_item_value
|
151
262
|
end
|
263
|
+
end
|
152
264
|
|
153
|
-
|
154
|
-
|
265
|
+
def aggregate_subsequents_intervals(aggregate_data, current_time, group_item_value, s_interval)
|
266
|
+
group_item_value['intervals'][s_interval].each do |aggregator_item_key, aggregator_item_value|
|
267
|
+
acumulative_aggregation(aggregate_data, aggregator_item_key, aggregator_item_value, current_time, group_item_value, s_interval)
|
155
268
|
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def acumulative_aggregation(aggregate_data, aggregator_item_key, aggregator_item_value, current_time, group_item_value, s_interval)
|
272
|
+
interval = s_interval.to_i
|
273
|
+
# If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
|
274
|
+
limit_time = @processing_mode == :batch ? 0 : aggregator_item_value[:time_started] + interval + @keep_interval
|
275
|
+
|
276
|
+
# @log.debug "processing_mode:#{@processing_mode} limit_time:#{limit_time}"
|
277
|
+
|
278
|
+
unless current_time < limit_time
|
279
|
+
aggregator_data = {}
|
280
|
+
aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
|
281
|
+
aggregator_data.merge!(group_item_value['group_fields'])
|
156
282
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
|
172
|
-
aggregator_data.merge!(group_item_value["group_fields"])
|
173
|
-
|
174
|
-
aggregator_data["time"] = aggregator_item_key
|
175
|
-
aggregator_data["processed"] = aggregator_item_value["processed"]
|
176
|
-
aggregator_data["aggregator_id"] = @aggregator_name if @aggregator_name
|
177
|
-
|
178
|
-
#Add entry in accumulative aggregation hash
|
179
|
-
group_item_value['intervals'].keys[1..-1].each{|interval_secs|
|
180
|
-
interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
|
181
|
-
#@log.debug "interval_aggregator_item_key: #{interval_aggregator_item_key}"
|
182
|
-
|
183
|
-
if interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
184
|
-
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started] if interval_aggregator_item_value[:time_started] < aggregator_item_value[:time_started]
|
185
|
-
interval_aggregator_item_value["processed"] += aggregator_item_value["processed"]
|
186
|
-
#@log.debug interval_aggregator_item_value
|
187
|
-
else
|
188
|
-
interval_aggregator_item_value = {}
|
189
|
-
interval_aggregator_item_value[:time_started] = aggregator_item_value[:time_started]
|
190
|
-
interval_aggregator_item_value["aggregate_fields"]={}
|
191
|
-
interval_aggregator_item_value["processed"] = aggregator_item_value["processed"]
|
192
|
-
group_item_value['intervals'][interval_secs][interval_aggregator_item_key] = interval_aggregator_item_value
|
193
|
-
#@log.debug interval_aggregator_item_value
|
194
|
-
end
|
195
|
-
}
|
196
|
-
|
197
|
-
aggregator_item_value.each { |aggregate_field_key,aggregate_field_value|
|
198
|
-
#Create field metadata for subsecuents aggregations
|
199
|
-
group_item_value['intervals'].keys[1..-1].each{|interval_secs|
|
200
|
-
interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
|
201
|
-
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
202
|
-
#@log.debug interval_aggregator_item_value
|
203
|
-
if ! interval_aggregator_item_value["aggregate_fields"].has_key?(aggregate_field_key) && aggregate_field_value.is_a?(Array)
|
204
|
-
interval_aggregator_item_value["aggregate_fields"][aggregate_field_key]={}
|
205
|
-
@aggregation_names.each {|operation|
|
206
|
-
interval_aggregator_item_value["aggregate_fields"][aggregate_field_key][operation]=[]
|
207
|
-
}
|
208
|
-
end
|
209
|
-
}
|
210
|
-
|
211
|
-
#Aggregate data
|
212
|
-
if aggregate_field_value.is_a?(Array)
|
213
|
-
@aggregation_names.each {|operation|
|
214
|
-
data = aggregate_field_value.method(operation).call
|
215
|
-
aggregator_data["#{aggregate_field_key}_#{operation}"] = data
|
216
|
-
|
217
|
-
#Add aggregated data to interval
|
218
|
-
group_item_value['intervals'].keys[1..-1].each{|interval_secs|
|
219
|
-
interval_aggregator_item_key=(aggregator_item_key/interval_secs.to_i)*interval_secs.to_i
|
220
|
-
interval_aggregator_item_value = group_item_value['intervals'][interval_secs][interval_aggregator_item_key]
|
221
|
-
interval_aggregator_item_value["aggregate_fields"][aggregate_field_key][operation] << data
|
222
|
-
}
|
223
|
-
}
|
224
|
-
end
|
225
|
-
}
|
226
|
-
|
227
|
-
group_item_value["aggregate_fields"].delete(aggregator_item_key)
|
228
|
-
aggregate_data[group_item_value['intervals'].keys[0]] =[] if aggregate_data[group_item_value['intervals'].keys[0]].nil?
|
229
|
-
aggregate_data[group_item_value['intervals'].keys[0]] << aggregator_data
|
230
|
-
end
|
231
|
-
}
|
232
|
-
|
233
|
-
#Calculate subsecuents aggregations
|
234
|
-
group_item_value["intervals"].keys[1..-1].each {|s_interval|
|
235
|
-
group_item_value["intervals"][s_interval].each{|aggregator_item_key,aggregator_item_value|
|
236
|
-
interval = s_interval.to_i
|
237
|
-
#If processing mode is :batch, aggregate immediatly, else wait to arrive events (streaming processing like fluentd)
|
238
|
-
@processing_mode == :batch ? limit_time = 0 : limit_time = aggregator_item_value[:time_started] + interval + @keep_interval
|
239
|
-
|
240
|
-
#@log.debug "processing_mode:#{@processing_mode} limit_time:#{limit_time}"
|
241
|
-
|
242
|
-
if current_time >= limit_time
|
243
|
-
aggregator_data = {}
|
244
|
-
aggregator_data[@time_field] = Time.at(aggregator_item_key).strftime(@output_time_format)
|
245
|
-
aggregator_data.merge!(group_item_value["group_fields"])
|
246
|
-
|
247
|
-
aggregator_data["time"] = aggregator_item_key
|
248
|
-
aggregator_data["processed"] = aggregator_item_value["processed"]
|
249
|
-
aggregator_data["aggregator_id"] = @aggregator_name if @aggregator_name
|
250
|
-
aggregator_item_value["aggregate_fields"].each{|field_name,field_data|
|
251
|
-
field_data.each{|operation,vector|
|
252
|
-
case operation
|
253
|
-
when 'max','min','mean','median'
|
254
|
-
data = vector.method(operation).call
|
255
|
-
else
|
256
|
-
data = vector.median
|
257
|
-
end
|
258
|
-
aggregator_data["#{field_name}_#{operation}"] = data
|
259
|
-
}
|
260
|
-
}
|
261
|
-
#@log.debug aggregator_item_value
|
262
|
-
#@log.debug aggregator_data
|
263
|
-
group_item_value["intervals"][s_interval].delete(aggregator_item_key)
|
264
|
-
aggregate_data[s_interval] =[] if aggregate_data[s_interval].nil?
|
265
|
-
aggregate_data[s_interval] << aggregator_data
|
266
|
-
end
|
267
|
-
}
|
268
|
-
}
|
269
|
-
}
|
270
|
-
|
271
|
-
#@log.debug aggregate_data
|
272
|
-
unless aggregate_data.empty?
|
273
|
-
aggregate_data
|
283
|
+
aggregator_data['time'] = aggregator_item_key
|
284
|
+
aggregator_data['processed'] = aggregator_item_value['processed']
|
285
|
+
if @aggregator_name
|
286
|
+
aggregator_data['aggregator_id'] = @aggregator_name
|
287
|
+
end
|
288
|
+
aggregator_item_value['aggregate_fields'].each do |field_name, field_data|
|
289
|
+
field_data.each do |operation, vector|
|
290
|
+
case operation
|
291
|
+
when 'max', 'min', 'mean', 'median'
|
292
|
+
data = vector.method(operation).call
|
293
|
+
else
|
294
|
+
data = vector.median
|
295
|
+
end
|
296
|
+
aggregator_data["#{field_name}_#{operation}"] = data
|
274
297
|
end
|
275
|
-
|
276
|
-
|
298
|
+
end
|
299
|
+
# @log.debug aggregator_item_value
|
300
|
+
# @log.debug aggregator_data
|
301
|
+
group_item_value['intervals'][s_interval].delete(aggregator_item_key)
|
302
|
+
aggregate_data[s_interval] = [] if aggregate_data[s_interval].nil?
|
303
|
+
aggregate_data[s_interval] << aggregator_data
|
277
304
|
end
|
305
|
+
end
|
278
306
|
end
|
279
307
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dataoperations-aggregate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Guillen
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: descriptive_statistics
|