logstash-integration-aws 7.1.1-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.PRE.MERGE.md +658 -0
- data/CHANGELOG.md +33 -0
- data/CONTRIBUTORS +40 -0
- data/Gemfile +11 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +5 -0
- data/README.md +205 -0
- data/VERSION +1 -0
- data/docs/codec-cloudfront.asciidoc +53 -0
- data/docs/codec-cloudtrail.asciidoc +45 -0
- data/docs/index.asciidoc +36 -0
- data/docs/input-cloudwatch.asciidoc +320 -0
- data/docs/input-s3.asciidoc +346 -0
- data/docs/input-sqs.asciidoc +287 -0
- data/docs/output-cloudwatch.asciidoc +321 -0
- data/docs/output-s3.asciidoc +442 -0
- data/docs/output-sns.asciidoc +166 -0
- data/docs/output-sqs.asciidoc +242 -0
- data/lib/logstash/codecs/cloudfront.rb +84 -0
- data/lib/logstash/codecs/cloudtrail.rb +47 -0
- data/lib/logstash/inputs/cloudwatch.rb +338 -0
- data/lib/logstash/inputs/s3.rb +466 -0
- data/lib/logstash/inputs/sqs.rb +196 -0
- data/lib/logstash/outputs/cloudwatch.rb +346 -0
- data/lib/logstash/outputs/s3/file_repository.rb +193 -0
- data/lib/logstash/outputs/s3/path_validator.rb +18 -0
- data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/temporary_file.rb +114 -0
- data/lib/logstash/outputs/s3/temporary_file_factory.rb +126 -0
- data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/uploader.rb +76 -0
- data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
- data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
- data/lib/logstash/outputs/s3.rb +442 -0
- data/lib/logstash/outputs/sns.rb +133 -0
- data/lib/logstash/outputs/sqs.rb +167 -0
- data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
- data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
- data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
- data/lib/logstash-integration-aws_jars.rb +4 -0
- data/lib/tasks/build.rake +15 -0
- data/logstash-integration-aws.gemspec +55 -0
- data/spec/codecs/cloudfront_spec.rb +92 -0
- data/spec/codecs/cloudtrail_spec.rb +56 -0
- data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
- data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
- data/spec/fixtures/cloudfront.log +4 -0
- data/spec/fixtures/compressed.log.gee.zip +0 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/json.log +2 -0
- data/spec/fixtures/json_with_message.log +2 -0
- data/spec/fixtures/multiline.log +6 -0
- data/spec/fixtures/multiple_compressed_streams.gz +0 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/inputs/cloudwatch_spec.rb +85 -0
- data/spec/inputs/s3_spec.rb +610 -0
- data/spec/inputs/sincedb_spec.rb +17 -0
- data/spec/inputs/sqs_spec.rb +324 -0
- data/spec/integration/cloudwatch_spec.rb +25 -0
- data/spec/integration/dynamic_prefix_spec.rb +92 -0
- data/spec/integration/gzip_file_spec.rb +62 -0
- data/spec/integration/gzip_size_rotation_spec.rb +63 -0
- data/spec/integration/outputs/sqs_spec.rb +98 -0
- data/spec/integration/restore_from_crash_spec.rb +133 -0
- data/spec/integration/s3_spec.rb +66 -0
- data/spec/integration/size_rotation_spec.rb +59 -0
- data/spec/integration/sqs_spec.rb +110 -0
- data/spec/integration/stress_test_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
- data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
- data/spec/outputs/cloudwatch_spec.rb +38 -0
- data/spec/outputs/s3/file_repository_spec.rb +143 -0
- data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
- data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
- data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
- data/spec/outputs/s3/temporary_file_spec.rb +47 -0
- data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
- data/spec/outputs/s3/uploader_spec.rb +69 -0
- data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
- data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
- data/spec/outputs/s3_spec.rb +232 -0
- data/spec/outputs/sns_spec.rb +160 -0
- data/spec/plugin_mixin/aws_config_spec.rb +217 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/support/helpers.rb +121 -0
- data/spec/unit/outputs/sqs_spec.rb +247 -0
- data/vendor/jar-dependencies/org/logstash/plugins/integration/aws/logstash-integration-aws/7.1.1/logstash-integration-aws-7.1.1.jar +0 -0
- metadata +472 -0
@@ -0,0 +1,346 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/outputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/plugin_mixins/aws_config"
|
5
|
+
|
6
|
+
require "rufus/scheduler"
|
7
|
+
|
8
|
+
# This output lets you aggregate and send metric data to AWS CloudWatch
|
9
|
+
#
|
10
|
+
# ==== Summary:
|
11
|
+
# This plugin is intended to be used on a logstash indexer agent (but that
|
12
|
+
# is not the only way, see below.) In the intended scenario, one cloudwatch
|
13
|
+
# output plugin is configured, on the logstash indexer node, with just AWS API
|
14
|
+
# credentials, and possibly a region and/or a namespace. The output looks
|
15
|
+
# for fields present in events, and when it finds them, it uses them to
|
16
|
+
# calculate aggregate statistics. If the `metricname` option is set in this
|
17
|
+
# output, then any events which pass through it will be aggregated & sent to
|
18
|
+
# CloudWatch, but that is not recommended. The intended use is to NOT set the
|
19
|
+
# metricname option here, and instead to add a `CW_metricname` field (and other
|
20
|
+
# fields) to only the events you want sent to CloudWatch.
|
21
|
+
#
|
22
|
+
# When events pass through this output they are queued for background
|
23
|
+
# aggregation and sending, which happens every minute by default. The
|
24
|
+
# queue has a maximum size, and when it is full aggregated statistics will be
|
25
|
+
# sent to CloudWatch ahead of schedule. Whenever this happens a warning
|
26
|
+
# message is written to logstash's log. If you see this you should increase
|
27
|
+
# the `queue_size` configuration option to avoid the extra API calls. The queue
|
28
|
+
# is emptied every time we send data to CloudWatch.
|
29
|
+
#
|
30
|
+
# Note: when logstash is stopped the queue is destroyed before it can be processed.
|
31
|
+
# This is a known limitation of logstash and will hopefully be addressed in a
|
32
|
+
# future version.
|
33
|
+
#
|
34
|
+
# ==== Details:
|
35
|
+
# There are two ways to configure this plugin, and they can be used in
|
36
|
+
# combination: event fields & per-output defaults
|
37
|
+
#
|
38
|
+
# Event Field configuration...
|
39
|
+
# You add fields to your events in inputs & filters and this output reads
|
40
|
+
# those fields to aggregate events. The names of the fields read are
|
41
|
+
# configurable via the `field_*` options.
|
42
|
+
#
|
43
|
+
# Per-output defaults...
|
44
|
+
# You set universal defaults in this output plugin's configuration, and
|
45
|
+
# if an event does not have a field for that option then the default is
|
46
|
+
# used.
|
47
|
+
#
|
48
|
+
# Notice, the event fields take precedence over the per-output defaults.
|
49
|
+
#
|
50
|
+
# At a minimum events must have a "metric name" to be sent to CloudWatch.
|
51
|
+
# This can be achieved either by providing a default here OR by adding a
|
52
|
+
# `CW_metricname` field. By default, if no other configuration is provided
|
53
|
+
# besides a metric name, then events will be counted (Unit: Count, Value: 1)
|
54
|
+
# by their metric name (either a default or from their `CW_metricname` field)
|
55
|
+
#
|
56
|
+
# Other fields which can be added to events to modify the behavior of this
|
57
|
+
# plugin are, `CW_namespace`, `CW_unit`, `CW_value`, and
|
58
|
+
# `CW_dimensions`. All of these field names are configurable in
|
59
|
+
# this output. You can also set per-output defaults for any of them.
|
60
|
+
# See below for details.
|
61
|
+
#
|
62
|
+
# Read more about http://aws.amazon.com/cloudwatch/[AWS CloudWatch],
|
63
|
+
# and the specific of API endpoint this output uses,
|
64
|
+
# http://docs.amazonwebservices.com/AmazonCloudWatch/latest/APIReference/API_PutMetricData.html[PutMetricData]
|
65
|
+
class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base
|
66
|
+
include LogStash::PluginMixins::AwsConfig::V2
|
67
|
+
|
68
|
+
config_name "cloudwatch"
|
69
|
+
|
70
|
+
# Constants
|
71
|
+
# aggregate_key members
|
72
|
+
DIMENSIONS = "dimensions"
|
73
|
+
TIMESTAMP = "timestamp"
|
74
|
+
METRIC = "metric"
|
75
|
+
COUNT = "count"
|
76
|
+
UNIT = "unit"
|
77
|
+
SUM = "sum"
|
78
|
+
MIN = "min"
|
79
|
+
MAX = "max"
|
80
|
+
# Units
|
81
|
+
COUNT_UNIT = "Count"
|
82
|
+
NONE = "None"
|
83
|
+
|
84
|
+
# How often to send data to CloudWatch
|
85
|
+
# This does not affect the event timestamps, events will always have their
|
86
|
+
# actual timestamp (to-the-minute) sent to CloudWatch.
|
87
|
+
#
|
88
|
+
# We only call the API if there is data to send.
|
89
|
+
#
|
90
|
+
# See the Rufus Scheduler docs for an https://github.com/jmettraux/rufus-scheduler#the-time-strings-understood-by-rufus-scheduler[explanation of allowed values]
|
91
|
+
config :timeframe, :validate => :string, :default => "1m"
|
92
|
+
|
93
|
+
# How many events to queue before forcing a call to the CloudWatch API ahead of `timeframe` schedule
|
94
|
+
# Set this to the number of events-per-timeframe you will be sending to CloudWatch to avoid extra API calls
|
95
|
+
config :queue_size, :validate => :number, :default => 10000
|
96
|
+
|
97
|
+
# How many data points can be given in one call to the CloudWatch API
|
98
|
+
config :batch_size, :validate => :number, :default => 20
|
99
|
+
|
100
|
+
# The default namespace to use for events which do not have a `CW_namespace` field
|
101
|
+
config :namespace, :validate => :string, :default => "Logstash"
|
102
|
+
|
103
|
+
# The name of the field used to set a different namespace per event
|
104
|
+
# Note: Only one namespace can be sent to CloudWatch per API call
|
105
|
+
# so setting different namespaces will increase the number of API calls
|
106
|
+
# and those cost money.
|
107
|
+
config :field_namespace, :validate => :string, :default => "CW_namespace"
|
108
|
+
|
109
|
+
# The default metric name to use for events which do not have a `CW_metricname` field.
|
110
|
+
# Beware: If this is provided then all events which pass through this output will be aggregated and
|
111
|
+
# sent to CloudWatch, so use this carefully. Furthermore, when providing this option, you
|
112
|
+
# will probably want to also restrict events from passing through this output using event
|
113
|
+
# type, tag, and field matching
|
114
|
+
config :metricname, :validate => :string
|
115
|
+
|
116
|
+
# The name of the field used to set the metric name on an event
|
117
|
+
# The author of this plugin recommends adding this field to events in inputs &
|
118
|
+
# filters rather than using the per-output default setting so that one output
|
119
|
+
# plugin on your logstash indexer can serve all events (which of course had
|
120
|
+
# fields set on your logstash shippers.)
|
121
|
+
config :field_metricname, :validate => :string, :default => "CW_metricname"
|
122
|
+
|
123
|
+
VALID_UNITS = ["Seconds", "Microseconds", "Milliseconds", "Bytes",
|
124
|
+
"Kilobytes", "Megabytes", "Gigabytes", "Terabytes",
|
125
|
+
"Bits", "Kilobits", "Megabits", "Gigabits", "Terabits",
|
126
|
+
"Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second",
|
127
|
+
"Megabytes/Second", "Gigabytes/Second", "Terabytes/Second",
|
128
|
+
"Bits/Second", "Kilobits/Second", "Megabits/Second",
|
129
|
+
"Gigabits/Second", "Terabits/Second", "Count/Second", NONE]
|
130
|
+
|
131
|
+
# The default unit to use for events which do not have a `CW_unit` field
|
132
|
+
# If you set this option you should probably set the "value" option along with it
|
133
|
+
config :unit, :validate => VALID_UNITS, :default => COUNT_UNIT
|
134
|
+
|
135
|
+
# The name of the field used to set the unit on an event metric
|
136
|
+
config :field_unit, :validate => :string, :default => "CW_unit"
|
137
|
+
|
138
|
+
# The default value to use for events which do not have a `CW_value` field
|
139
|
+
# If provided, this must be a string which can be converted to a float, for example...
|
140
|
+
# "1", "2.34", ".5", and "0.67"
|
141
|
+
# If you set this option you should probably set the `unit` option along with it
|
142
|
+
config :value, :validate => :string, :default => "1"
|
143
|
+
|
144
|
+
# The name of the field used to set the value (float) on an event metric
|
145
|
+
config :field_value, :validate => :string, :default => "CW_value"
|
146
|
+
|
147
|
+
# The default dimensions [ name, value, ... ] to use for events which do not have a `CW_dimensions` field
|
148
|
+
config :dimensions, :validate => :hash
|
149
|
+
|
150
|
+
# The name of the field used to set the dimensions on an event metric
|
151
|
+
# The field named here, if present in an event, must have an array of
|
152
|
+
# one or more key & value pairs, for example...
|
153
|
+
# `add_field => [ "CW_dimensions", "Environment", "CW_dimensions", "prod" ]`
|
154
|
+
# or, equivalently...
|
155
|
+
# `add_field => [ "CW_dimensions", "Environment" ]`
|
156
|
+
# `add_field => [ "CW_dimensions", "prod" ]`
|
157
|
+
config :field_dimensions, :validate => :string, :default => "CW_dimensions"
|
158
|
+
|
159
|
+
attr_reader :event_queue
|
160
|
+
|
161
|
+
public
|
162
|
+
def register
|
163
|
+
require "thread"
|
164
|
+
require "aws-sdk-cloudwatch"
|
165
|
+
|
166
|
+
@cw = Aws::CloudWatch::Client.new(aws_options_hash)
|
167
|
+
|
168
|
+
@event_queue = SizedQueue.new(@queue_size)
|
169
|
+
@scheduler = Rufus::Scheduler.new
|
170
|
+
@job = @scheduler.schedule_every @timeframe do
|
171
|
+
@logger.debug("Scheduler Activated")
|
172
|
+
publish(aggregate({}))
|
173
|
+
end
|
174
|
+
end # def register
|
175
|
+
|
176
|
+
# Rufus::Scheduler >= 3.4 moved the Time impl into a gem EoTime = ::EtOrbi::EoTime`
|
177
|
+
# Rufus::Scheduler 3.1 - 3.3 using it's own Time impl `Rufus::Scheduler::ZoTime`
|
178
|
+
RufusTimeImpl = defined?(Rufus::Scheduler::EoTime) ? Rufus::Scheduler::EoTime :
|
179
|
+
(defined?(Rufus::Scheduler::ZoTime) ? Rufus::Scheduler::ZoTime : ::Time)
|
180
|
+
|
181
|
+
public
|
182
|
+
def receive(event)
|
183
|
+
return unless (event.get(@field_metricname) || @metricname)
|
184
|
+
|
185
|
+
if (@event_queue.length >= @event_queue.max)
|
186
|
+
@job.trigger RufusTimeImpl.now
|
187
|
+
@logger.warn("Posted to AWS CloudWatch ahead of schedule. If you see this often, consider increasing the cloudwatch queue_size option.")
|
188
|
+
end
|
189
|
+
|
190
|
+
@logger.debug("Queueing event", :event => event)
|
191
|
+
@event_queue << event
|
192
|
+
end # def receive
|
193
|
+
|
194
|
+
private
|
195
|
+
def publish(aggregates)
|
196
|
+
aggregates.each do |namespace, data|
|
197
|
+
@logger.debug("Namespace, data: ", :namespace => namespace, :data => data)
|
198
|
+
metric_data = []
|
199
|
+
data.each do |aggregate_key, stats|
|
200
|
+
new_data = {
|
201
|
+
:metric_name => aggregate_key[METRIC],
|
202
|
+
:timestamp => aggregate_key[TIMESTAMP],
|
203
|
+
:unit => aggregate_key[UNIT],
|
204
|
+
:statistic_values => {
|
205
|
+
:sample_count => stats[COUNT],
|
206
|
+
:sum => stats[SUM],
|
207
|
+
:minimum => stats[MIN],
|
208
|
+
:maximum => stats[MAX],
|
209
|
+
}
|
210
|
+
}
|
211
|
+
dims = aggregate_key[DIMENSIONS]
|
212
|
+
if (dims.is_a?(Array) && dims.length > 0 && (dims.length % 2) == 0)
|
213
|
+
new_data[:dimensions] = Array.new
|
214
|
+
i = 0
|
215
|
+
while (i < dims.length)
|
216
|
+
new_data[:dimensions] << {:name => dims[i], :value => dims[i+1]}
|
217
|
+
i += 2
|
218
|
+
end
|
219
|
+
end
|
220
|
+
metric_data << new_data
|
221
|
+
end # data.each
|
222
|
+
|
223
|
+
metric_data.each_slice(@batch_size) do |batch|
|
224
|
+
begin
|
225
|
+
@cw.put_metric_data(
|
226
|
+
:namespace => namespace,
|
227
|
+
:metric_data => batch
|
228
|
+
)
|
229
|
+
@logger.debug("Sent data to AWS CloudWatch OK", :namespace => namespace, :metric_data => batch)
|
230
|
+
rescue Exception => e
|
231
|
+
@logger.warn("Failed to send to AWS CloudWatch", :exception => e, :namespace => namespace, :metric_data => batch)
|
232
|
+
break
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end # aggregates.each
|
236
|
+
return aggregates
|
237
|
+
end# def publish
|
238
|
+
|
239
|
+
private
|
240
|
+
def aggregate(aggregates)
|
241
|
+
@logger.debug("QUEUE SIZE ", :queuesize => @event_queue.size)
|
242
|
+
while !@event_queue.empty? do
|
243
|
+
begin
|
244
|
+
count(aggregates, @event_queue.pop(true))
|
245
|
+
rescue Exception => e
|
246
|
+
@logger.warn("Exception! Breaking count loop", :exception => e)
|
247
|
+
break
|
248
|
+
end
|
249
|
+
end
|
250
|
+
return aggregates
|
251
|
+
end # def aggregate
|
252
|
+
|
253
|
+
private
|
254
|
+
def count(aggregates, event)
|
255
|
+
# If the event doesn't declare a namespace, use the default
|
256
|
+
fnamespace = field(event, @field_namespace)
|
257
|
+
namespace = (fnamespace ? fnamespace : event.sprintf(@namespace))
|
258
|
+
|
259
|
+
funit = field(event, @field_unit)
|
260
|
+
unit = (funit ? funit : event.sprintf(@unit))
|
261
|
+
|
262
|
+
fvalue = field(event, @field_value)
|
263
|
+
value = (fvalue ? fvalue : event.sprintf(@value))
|
264
|
+
|
265
|
+
# We may get to this point with valid Units but missing value. Send zeros.
|
266
|
+
val = (!value) ? 0.0 : value.to_f
|
267
|
+
|
268
|
+
# Event provides exactly one (but not both) of value or unit
|
269
|
+
if ( (fvalue == nil) ^ (funit == nil) )
|
270
|
+
@logger.warn("Likely config error: event has one of #{@field_value} or #{@field_unit} fields but not both.", :event => event)
|
271
|
+
end
|
272
|
+
|
273
|
+
# If Unit is still not set or is invalid warn about misconfiguration & use NONE
|
274
|
+
if (!VALID_UNITS.include?(unit))
|
275
|
+
unit = NONE
|
276
|
+
@logger.warn("Likely config error: invalid or missing Units (#{unit.to_s}), using '#{NONE}' instead", :event => event)
|
277
|
+
end
|
278
|
+
|
279
|
+
if (!aggregates[namespace])
|
280
|
+
aggregates[namespace] = {}
|
281
|
+
end
|
282
|
+
|
283
|
+
dims = event.get(@field_dimensions)
|
284
|
+
if (dims) # event provides dimensions
|
285
|
+
# validate the structure
|
286
|
+
if (!dims.is_a?(Array) || dims.length == 0 || (dims.length % 2) != 0)
|
287
|
+
@logger.warn("Likely config error: CloudWatch dimensions field (#{dims.to_s}) found which is not a positive- & even-length array. Ignoring it.", :event => event)
|
288
|
+
dims = nil
|
289
|
+
end
|
290
|
+
# Best case, we get here and exit the conditional because dims...
|
291
|
+
# - is an array
|
292
|
+
# - with positive length
|
293
|
+
# - and an even number of elements
|
294
|
+
elsif (@dimensions.is_a?(Hash)) # event did not provide dimensions, but the output has been configured with a default
|
295
|
+
dims = @dimensions.flatten.map{|d| event.sprintf(d)} # into the kind of array described just above
|
296
|
+
else
|
297
|
+
dims = nil
|
298
|
+
end
|
299
|
+
|
300
|
+
fmetric = field(event, @field_metricname)
|
301
|
+
aggregate_key = {
|
302
|
+
METRIC => (fmetric ? fmetric : event.sprintf(@metricname)),
|
303
|
+
DIMENSIONS => dims,
|
304
|
+
UNIT => unit,
|
305
|
+
TIMESTAMP => event.sprintf("%{+YYYY-MM-dd'T'HH:mm:00Z}")
|
306
|
+
}
|
307
|
+
|
308
|
+
if (!aggregates[namespace][aggregate_key])
|
309
|
+
aggregates[namespace][aggregate_key] = {}
|
310
|
+
end
|
311
|
+
|
312
|
+
if (!aggregates[namespace][aggregate_key][MAX] || val > aggregates[namespace][aggregate_key][MAX])
|
313
|
+
aggregates[namespace][aggregate_key][MAX] = val
|
314
|
+
end
|
315
|
+
|
316
|
+
if (!aggregates[namespace][aggregate_key][MIN] || val < aggregates[namespace][aggregate_key][MIN])
|
317
|
+
aggregates[namespace][aggregate_key][MIN] = val
|
318
|
+
end
|
319
|
+
|
320
|
+
if (!aggregates[namespace][aggregate_key][COUNT])
|
321
|
+
aggregates[namespace][aggregate_key][COUNT] = 1
|
322
|
+
else
|
323
|
+
aggregates[namespace][aggregate_key][COUNT] += 1
|
324
|
+
end
|
325
|
+
|
326
|
+
if (!aggregates[namespace][aggregate_key][SUM])
|
327
|
+
aggregates[namespace][aggregate_key][SUM] = val
|
328
|
+
else
|
329
|
+
aggregates[namespace][aggregate_key][SUM] += val
|
330
|
+
end
|
331
|
+
end # def count
|
332
|
+
|
333
|
+
private
|
334
|
+
def field(event, fieldname)
|
335
|
+
if !event.get(fieldname)
|
336
|
+
return nil
|
337
|
+
else
|
338
|
+
if event.get(fieldname).is_a?(Array)
|
339
|
+
return event.get(fieldname).first
|
340
|
+
else
|
341
|
+
return event.get(fieldname)
|
342
|
+
end
|
343
|
+
end
|
344
|
+
end # def field
|
345
|
+
|
346
|
+
end # class LogStash::Outputs::CloudWatch
|
@@ -0,0 +1,193 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "java"
|
3
|
+
require "concurrent"
|
4
|
+
require "concurrent/timer_task"
|
5
|
+
require "logstash/util"
|
6
|
+
|
7
|
+
ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
|
8
|
+
|
9
|
+
module LogStash
|
10
|
+
module Outputs
|
11
|
+
class S3
|
12
|
+
class FileRepository
|
13
|
+
DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
|
14
|
+
DEFAULT_STALE_TIME_SECS = 15 * 60
|
15
|
+
# Ensure that all access or work done
|
16
|
+
# on a factory is threadsafe
|
17
|
+
class PrefixedValue
|
18
|
+
def initialize(file_factory, stale_time)
|
19
|
+
@file_factory = file_factory
|
20
|
+
@lock = Monitor.new # reentrant Mutex
|
21
|
+
@stale_time = stale_time
|
22
|
+
@is_deleted = false
|
23
|
+
end
|
24
|
+
|
25
|
+
def with_lock
|
26
|
+
@lock.synchronize {
|
27
|
+
yield @file_factory
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def stale?
|
32
|
+
with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def apply(prefix)
|
36
|
+
return self
|
37
|
+
end
|
38
|
+
|
39
|
+
def delete!
|
40
|
+
with_lock do |factory|
|
41
|
+
factory.current.delete!
|
42
|
+
@is_deleted = true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def deleted?
|
47
|
+
with_lock { |_| @is_deleted }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class FactoryInitializer
|
52
|
+
include java.util.function.Function
|
53
|
+
def initialize(tags, encoding, temporary_directory, stale_time)
|
54
|
+
@tags = tags
|
55
|
+
@encoding = encoding
|
56
|
+
@temporary_directory = temporary_directory
|
57
|
+
@stale_time = stale_time
|
58
|
+
end
|
59
|
+
|
60
|
+
def apply(prefix_key)
|
61
|
+
PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def initialize(tags, encoding, temporary_directory,
|
66
|
+
stale_time = DEFAULT_STALE_TIME_SECS,
|
67
|
+
sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
|
68
|
+
# The path need to contains the prefix so when we start
|
69
|
+
# logtash after a crash we keep the remote structure
|
70
|
+
@prefixed_factories = ConcurrentHashMap.new
|
71
|
+
|
72
|
+
@sweeper_interval = sweeper_interval
|
73
|
+
|
74
|
+
@factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
|
75
|
+
|
76
|
+
start_stale_sweeper
|
77
|
+
end
|
78
|
+
|
79
|
+
def keys
|
80
|
+
@prefixed_factories.keySet
|
81
|
+
end
|
82
|
+
|
83
|
+
##
|
84
|
+
# Yields the current file of each non-deleted file factory while the current thread has exclusive access to it.
|
85
|
+
# @yieldparam file [TemporaryFile]
|
86
|
+
# @return [void]
|
87
|
+
def each_files
|
88
|
+
each_factory(keys) do |factory|
|
89
|
+
yield factory.current
|
90
|
+
end
|
91
|
+
nil # void return avoid leaking unsynchronized access
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# Yields the file factory while the current thread has exclusive access to it, creating a new
|
96
|
+
# one if one does not exist or if the current one is being reaped by the stale watcher.
|
97
|
+
# @param prefix_key [String]: the prefix key
|
98
|
+
# @yieldparam factory [TemporaryFileFactory]: a temporary file factory that this thread has exclusive access to
|
99
|
+
# @return [void]
|
100
|
+
def get_factory(prefix_key)
|
101
|
+
# fast-path: if factory exists and is not deleted, yield it with exclusive access and return
|
102
|
+
prefix_val = @prefixed_factories.get(prefix_key)
|
103
|
+
prefix_val&.with_lock do |factory|
|
104
|
+
# intentional local-jump to ensure deletion detection
|
105
|
+
# is done inside the exclusive access.
|
106
|
+
unless prefix_val.deleted?
|
107
|
+
yield(factory)
|
108
|
+
return nil # void return avoid leaking unsynchronized access
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# slow-path:
|
113
|
+
# the ConcurrentHashMap#get operation is lock-free, but may have returned an entry that was being deleted by
|
114
|
+
# another thread (such as via stale detection). If we failed to retrieve a value, or retrieved one that had
|
115
|
+
# been marked deleted, use the atomic ConcurrentHashMap#compute to retrieve a non-deleted entry.
|
116
|
+
prefix_val = @prefixed_factories.compute(prefix_key) do |_, existing|
|
117
|
+
existing && !existing.deleted? ? existing : @factory_initializer.apply(prefix_key)
|
118
|
+
end
|
119
|
+
prefix_val.with_lock { |factory| yield factory }
|
120
|
+
nil # void return avoid leaking unsynchronized access
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# Yields each non-deleted file factory while the current thread has exclusive access to it.
|
125
|
+
# @param prefixes [Array<String>]: the prefix keys
|
126
|
+
# @yieldparam factory [TemporaryFileFactory]
|
127
|
+
# @return [void]
|
128
|
+
def each_factory(prefixes)
|
129
|
+
prefixes.each do |prefix_key|
|
130
|
+
prefix_val = @prefixed_factories.get(prefix_key)
|
131
|
+
prefix_val&.with_lock do |factory|
|
132
|
+
yield factory unless prefix_val.deleted?
|
133
|
+
end
|
134
|
+
end
|
135
|
+
nil # void return avoid leaking unsynchronized access
|
136
|
+
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# Ensures that a non-deleted factory exists for the provided prefix and yields its current file
|
140
|
+
# while the current thread has exclusive access to it.
|
141
|
+
# @param prefix_key [String]
|
142
|
+
# @yieldparam file [TemporaryFile]
|
143
|
+
# @return [void]
|
144
|
+
def get_file(prefix_key)
|
145
|
+
get_factory(prefix_key) { |factory| yield factory.current }
|
146
|
+
nil # void return avoid leaking unsynchronized access
|
147
|
+
end
|
148
|
+
|
149
|
+
def shutdown
|
150
|
+
stop_stale_sweeper
|
151
|
+
end
|
152
|
+
|
153
|
+
def size
|
154
|
+
@prefixed_factories.size
|
155
|
+
end
|
156
|
+
|
157
|
+
def remove_if_stale(prefix_key)
|
158
|
+
# we use the ATOMIC `ConcurrentHashMap#computeIfPresent` to atomically
|
159
|
+
# detect the staleness, mark a stale prefixed factory as deleted, and delete from the map.
|
160
|
+
@prefixed_factories.computeIfPresent(prefix_key) do |_, prefixed_factory|
|
161
|
+
# once we have retrieved an instance, we acquire exclusive access to it
|
162
|
+
# for stale detection, marking it as deleted before releasing the lock
|
163
|
+
# and causing it to become deleted from the map.
|
164
|
+
prefixed_factory.with_lock do |_|
|
165
|
+
if prefixed_factory.stale?
|
166
|
+
prefixed_factory.delete! # mark deleted to prevent reuse
|
167
|
+
nil # cause deletion
|
168
|
+
else
|
169
|
+
prefixed_factory # keep existing
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def start_stale_sweeper
|
176
|
+
@stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
|
177
|
+
LogStash::Util.set_thread_name("S3, Stale factory sweeper")
|
178
|
+
|
179
|
+
@prefixed_factories.keys.each do |prefix|
|
180
|
+
remove_if_stale(prefix)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
@stale_sweeper.execute
|
185
|
+
end
|
186
|
+
|
187
|
+
def stop_stale_sweeper
|
188
|
+
@stale_sweeper.shutdown
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class PathValidator
|
6
|
+
INVALID_CHARACTERS = "\^`><"
|
7
|
+
|
8
|
+
def self.valid?(name)
|
9
|
+
name.match(matches_re).nil?
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.matches_re
|
13
|
+
/[#{Regexp.escape(INVALID_CHARACTERS)}]/
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/outputs/s3/size_rotation_policy"
|
3
|
+
require "logstash/outputs/s3/time_rotation_policy"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class S3
|
8
|
+
class SizeAndTimeRotationPolicy
|
9
|
+
def initialize(file_size, time_file)
|
10
|
+
@size_strategy = SizeRotationPolicy.new(file_size)
|
11
|
+
@time_strategy = TimeRotationPolicy.new(time_file)
|
12
|
+
end
|
13
|
+
|
14
|
+
def rotate?(file)
|
15
|
+
@size_strategy.rotate?(file) || @time_strategy.rotate?(file)
|
16
|
+
end
|
17
|
+
|
18
|
+
def needs_periodic?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class SizeRotationPolicy
|
6
|
+
attr_reader :size_file
|
7
|
+
|
8
|
+
def initialize(size_file)
|
9
|
+
if size_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@size_file = size_file
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size >= size_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|