logstash-integration-aws 0.1.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.PRE.MERGE.md +658 -0
- data/CHANGELOG.md +15 -0
- data/CONTRIBUTORS +40 -0
- data/Gemfile +11 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +5 -0
- data/README.md +205 -0
- data/docs/codec-cloudfront.asciidoc +53 -0
- data/docs/codec-cloudtrail.asciidoc +45 -0
- data/docs/index.asciidoc +38 -0
- data/docs/input-cloudwatch.asciidoc +320 -0
- data/docs/input-s3.asciidoc +346 -0
- data/docs/input-sqs.asciidoc +287 -0
- data/docs/output-cloudwatch.asciidoc +321 -0
- data/docs/output-s3.asciidoc +442 -0
- data/docs/output-sns.asciidoc +166 -0
- data/docs/output-sqs.asciidoc +242 -0
- data/lib/logstash/codecs/cloudfront.rb +84 -0
- data/lib/logstash/codecs/cloudtrail.rb +47 -0
- data/lib/logstash/inputs/cloudwatch.rb +338 -0
- data/lib/logstash/inputs/s3.rb +466 -0
- data/lib/logstash/inputs/sqs.rb +196 -0
- data/lib/logstash/outputs/cloudwatch.rb +346 -0
- data/lib/logstash/outputs/s3/file_repository.rb +121 -0
- data/lib/logstash/outputs/s3/path_validator.rb +18 -0
- data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
- data/lib/logstash/outputs/s3/temporary_file_factory.rb +129 -0
- data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/uploader.rb +74 -0
- data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
- data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
- data/lib/logstash/outputs/s3.rb +405 -0
- data/lib/logstash/outputs/sns.rb +133 -0
- data/lib/logstash/outputs/sqs.rb +167 -0
- data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
- data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
- data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
- data/logstash-integration-aws.gemspec +52 -0
- data/spec/codecs/cloudfront_spec.rb +92 -0
- data/spec/codecs/cloudtrail_spec.rb +56 -0
- data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
- data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
- data/spec/fixtures/cloudfront.log +4 -0
- data/spec/fixtures/compressed.log.gee.zip +0 -0
- data/spec/fixtures/compressed.log.gz +0 -0
- data/spec/fixtures/compressed.log.gzip +0 -0
- data/spec/fixtures/invalid_utf8.gbk.log +2 -0
- data/spec/fixtures/json.log +2 -0
- data/spec/fixtures/json_with_message.log +2 -0
- data/spec/fixtures/multiline.log +6 -0
- data/spec/fixtures/multiple_compressed_streams.gz +0 -0
- data/spec/fixtures/uncompressed.log +2 -0
- data/spec/inputs/cloudwatch_spec.rb +85 -0
- data/spec/inputs/s3_spec.rb +610 -0
- data/spec/inputs/sincedb_spec.rb +17 -0
- data/spec/inputs/sqs_spec.rb +324 -0
- data/spec/integration/cloudwatch_spec.rb +25 -0
- data/spec/integration/dynamic_prefix_spec.rb +92 -0
- data/spec/integration/gzip_file_spec.rb +62 -0
- data/spec/integration/gzip_size_rotation_spec.rb +63 -0
- data/spec/integration/outputs/sqs_spec.rb +98 -0
- data/spec/integration/restore_from_crash_spec.rb +67 -0
- data/spec/integration/s3_spec.rb +66 -0
- data/spec/integration/size_rotation_spec.rb +59 -0
- data/spec/integration/sqs_spec.rb +110 -0
- data/spec/integration/stress_test_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
- data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
- data/spec/outputs/cloudwatch_spec.rb +38 -0
- data/spec/outputs/s3/file_repository_spec.rb +143 -0
- data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
- data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
- data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
- data/spec/outputs/s3/temporary_file_spec.rb +47 -0
- data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
- data/spec/outputs/s3/uploader_spec.rb +69 -0
- data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
- data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
- data/spec/outputs/s3_spec.rb +232 -0
- data/spec/outputs/sns_spec.rb +160 -0
- data/spec/plugin_mixin/aws_config_spec.rb +217 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/support/helpers.rb +119 -0
- data/spec/unit/outputs/sqs_spec.rb +247 -0
- metadata +467 -0
@@ -0,0 +1,346 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/outputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/plugin_mixins/aws_config"
|
5
|
+
|
6
|
+
require "rufus/scheduler"
|
7
|
+
|
8
|
+
# This output lets you aggregate and send metric data to AWS CloudWatch
|
9
|
+
#
|
10
|
+
# ==== Summary:
|
11
|
+
# This plugin is intended to be used on a logstash indexer agent (but that
|
12
|
+
# is not the only way, see below.) In the intended scenario, one cloudwatch
|
13
|
+
# output plugin is configured, on the logstash indexer node, with just AWS API
|
14
|
+
# credentials, and possibly a region and/or a namespace. The output looks
|
15
|
+
# for fields present in events, and when it finds them, it uses them to
|
16
|
+
# calculate aggregate statistics. If the `metricname` option is set in this
|
17
|
+
# output, then any events which pass through it will be aggregated & sent to
|
18
|
+
# CloudWatch, but that is not recommended. The intended use is to NOT set the
|
19
|
+
# metricname option here, and instead to add a `CW_metricname` field (and other
|
20
|
+
# fields) to only the events you want sent to CloudWatch.
|
21
|
+
#
|
22
|
+
# When events pass through this output they are queued for background
|
23
|
+
# aggregation and sending, which happens every minute by default. The
|
24
|
+
# queue has a maximum size, and when it is full aggregated statistics will be
|
25
|
+
# sent to CloudWatch ahead of schedule. Whenever this happens a warning
|
26
|
+
# message is written to logstash's log. If you see this you should increase
|
27
|
+
# the `queue_size` configuration option to avoid the extra API calls. The queue
|
28
|
+
# is emptied every time we send data to CloudWatch.
|
29
|
+
#
|
30
|
+
# Note: when logstash is stopped the queue is destroyed before it can be processed.
|
31
|
+
# This is a known limitation of logstash and will hopefully be addressed in a
|
32
|
+
# future version.
|
33
|
+
#
|
34
|
+
# ==== Details:
|
35
|
+
# There are two ways to configure this plugin, and they can be used in
|
36
|
+
# combination: event fields & per-output defaults
|
37
|
+
#
|
38
|
+
# Event Field configuration...
|
39
|
+
# You add fields to your events in inputs & filters and this output reads
|
40
|
+
# those fields to aggregate events. The names of the fields read are
|
41
|
+
# configurable via the `field_*` options.
|
42
|
+
#
|
43
|
+
# Per-output defaults...
|
44
|
+
# You set universal defaults in this output plugin's configuration, and
|
45
|
+
# if an event does not have a field for that option then the default is
|
46
|
+
# used.
|
47
|
+
#
|
48
|
+
# Notice, the event fields take precedence over the per-output defaults.
|
49
|
+
#
|
50
|
+
# At a minimum events must have a "metric name" to be sent to CloudWatch.
|
51
|
+
# This can be achieved either by providing a default here OR by adding a
|
52
|
+
# `CW_metricname` field. By default, if no other configuration is provided
|
53
|
+
# besides a metric name, then events will be counted (Unit: Count, Value: 1)
|
54
|
+
# by their metric name (either a default or from their `CW_metricname` field)
|
55
|
+
#
|
56
|
+
# Other fields which can be added to events to modify the behavior of this
|
57
|
+
# plugin are, `CW_namespace`, `CW_unit`, `CW_value`, and
|
58
|
+
# `CW_dimensions`. All of these field names are configurable in
|
59
|
+
# this output. You can also set per-output defaults for any of them.
|
60
|
+
# See below for details.
|
61
|
+
#
|
62
|
+
# Read more about http://aws.amazon.com/cloudwatch/[AWS CloudWatch],
|
63
|
+
# and the specific of API endpoint this output uses,
|
64
|
+
# http://docs.amazonwebservices.com/AmazonCloudWatch/latest/APIReference/API_PutMetricData.html[PutMetricData]
|
65
|
+
class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base
|
66
|
+
include LogStash::PluginMixins::AwsConfig::V2
|
67
|
+
|
68
|
+
config_name "cloudwatch"
|
69
|
+
|
70
|
+
# Constants
|
71
|
+
# aggregate_key members
|
72
|
+
DIMENSIONS = "dimensions"
|
73
|
+
TIMESTAMP = "timestamp"
|
74
|
+
METRIC = "metric"
|
75
|
+
COUNT = "count"
|
76
|
+
UNIT = "unit"
|
77
|
+
SUM = "sum"
|
78
|
+
MIN = "min"
|
79
|
+
MAX = "max"
|
80
|
+
# Units
|
81
|
+
COUNT_UNIT = "Count"
|
82
|
+
NONE = "None"
|
83
|
+
|
84
|
+
# How often to send data to CloudWatch
|
85
|
+
# This does not affect the event timestamps, events will always have their
|
86
|
+
# actual timestamp (to-the-minute) sent to CloudWatch.
|
87
|
+
#
|
88
|
+
# We only call the API if there is data to send.
|
89
|
+
#
|
90
|
+
# See the Rufus Scheduler docs for an https://github.com/jmettraux/rufus-scheduler#the-time-strings-understood-by-rufus-scheduler[explanation of allowed values]
|
91
|
+
config :timeframe, :validate => :string, :default => "1m"
|
92
|
+
|
93
|
+
# How many events to queue before forcing a call to the CloudWatch API ahead of `timeframe` schedule
|
94
|
+
# Set this to the number of events-per-timeframe you will be sending to CloudWatch to avoid extra API calls
|
95
|
+
config :queue_size, :validate => :number, :default => 10000
|
96
|
+
|
97
|
+
# How many data points can be given in one call to the CloudWatch API
|
98
|
+
config :batch_size, :validate => :number, :default => 20
|
99
|
+
|
100
|
+
# The default namespace to use for events which do not have a `CW_namespace` field
|
101
|
+
config :namespace, :validate => :string, :default => "Logstash"
|
102
|
+
|
103
|
+
# The name of the field used to set a different namespace per event
|
104
|
+
# Note: Only one namespace can be sent to CloudWatch per API call
|
105
|
+
# so setting different namespaces will increase the number of API calls
|
106
|
+
# and those cost money.
|
107
|
+
config :field_namespace, :validate => :string, :default => "CW_namespace"
|
108
|
+
|
109
|
+
# The default metric name to use for events which do not have a `CW_metricname` field.
|
110
|
+
# Beware: If this is provided then all events which pass through this output will be aggregated and
|
111
|
+
# sent to CloudWatch, so use this carefully. Furthermore, when providing this option, you
|
112
|
+
# will probably want to also restrict events from passing through this output using event
|
113
|
+
# type, tag, and field matching
|
114
|
+
config :metricname, :validate => :string
|
115
|
+
|
116
|
+
# The name of the field used to set the metric name on an event
|
117
|
+
# The author of this plugin recommends adding this field to events in inputs &
|
118
|
+
# filters rather than using the per-output default setting so that one output
|
119
|
+
# plugin on your logstash indexer can serve all events (which of course had
|
120
|
+
# fields set on your logstash shippers.)
|
121
|
+
config :field_metricname, :validate => :string, :default => "CW_metricname"
|
122
|
+
|
123
|
+
VALID_UNITS = ["Seconds", "Microseconds", "Milliseconds", "Bytes",
|
124
|
+
"Kilobytes", "Megabytes", "Gigabytes", "Terabytes",
|
125
|
+
"Bits", "Kilobits", "Megabits", "Gigabits", "Terabits",
|
126
|
+
"Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second",
|
127
|
+
"Megabytes/Second", "Gigabytes/Second", "Terabytes/Second",
|
128
|
+
"Bits/Second", "Kilobits/Second", "Megabits/Second",
|
129
|
+
"Gigabits/Second", "Terabits/Second", "Count/Second", NONE]
|
130
|
+
|
131
|
+
# The default unit to use for events which do not have a `CW_unit` field
|
132
|
+
# If you set this option you should probably set the "value" option along with it
|
133
|
+
config :unit, :validate => VALID_UNITS, :default => COUNT_UNIT
|
134
|
+
|
135
|
+
# The name of the field used to set the unit on an event metric
|
136
|
+
config :field_unit, :validate => :string, :default => "CW_unit"
|
137
|
+
|
138
|
+
# The default value to use for events which do not have a `CW_value` field
|
139
|
+
# If provided, this must be a string which can be converted to a float, for example...
|
140
|
+
# "1", "2.34", ".5", and "0.67"
|
141
|
+
# If you set this option you should probably set the `unit` option along with it
|
142
|
+
config :value, :validate => :string, :default => "1"
|
143
|
+
|
144
|
+
# The name of the field used to set the value (float) on an event metric
|
145
|
+
config :field_value, :validate => :string, :default => "CW_value"
|
146
|
+
|
147
|
+
# The default dimensions [ name, value, ... ] to use for events which do not have a `CW_dimensions` field
|
148
|
+
config :dimensions, :validate => :hash
|
149
|
+
|
150
|
+
# The name of the field used to set the dimensions on an event metric
|
151
|
+
# The field named here, if present in an event, must have an array of
|
152
|
+
# one or more key & value pairs, for example...
|
153
|
+
# `add_field => [ "CW_dimensions", "Environment", "CW_dimensions", "prod" ]`
|
154
|
+
# or, equivalently...
|
155
|
+
# `add_field => [ "CW_dimensions", "Environment" ]`
|
156
|
+
# `add_field => [ "CW_dimensions", "prod" ]`
|
157
|
+
config :field_dimensions, :validate => :string, :default => "CW_dimensions"
|
158
|
+
|
159
|
+
attr_reader :event_queue
|
160
|
+
|
161
|
+
public
|
162
|
+
def register
|
163
|
+
require "thread"
|
164
|
+
require "aws-sdk-cloudwatch"
|
165
|
+
|
166
|
+
@cw = Aws::CloudWatch::Client.new(aws_options_hash)
|
167
|
+
|
168
|
+
@event_queue = SizedQueue.new(@queue_size)
|
169
|
+
@scheduler = Rufus::Scheduler.new
|
170
|
+
@job = @scheduler.schedule_every @timeframe do
|
171
|
+
@logger.debug("Scheduler Activated")
|
172
|
+
publish(aggregate({}))
|
173
|
+
end
|
174
|
+
end # def register
|
175
|
+
|
176
|
+
# Rufus::Scheduler >= 3.4 moved the Time impl into a gem EoTime = ::EtOrbi::EoTime`
|
177
|
+
# Rufus::Scheduler 3.1 - 3.3 using it's own Time impl `Rufus::Scheduler::ZoTime`
|
178
|
+
RufusTimeImpl = defined?(Rufus::Scheduler::EoTime) ? Rufus::Scheduler::EoTime :
|
179
|
+
(defined?(Rufus::Scheduler::ZoTime) ? Rufus::Scheduler::ZoTime : ::Time)
|
180
|
+
|
181
|
+
public
|
182
|
+
def receive(event)
|
183
|
+
return unless (event.get(@field_metricname) || @metricname)
|
184
|
+
|
185
|
+
if (@event_queue.length >= @event_queue.max)
|
186
|
+
@job.trigger RufusTimeImpl.now
|
187
|
+
@logger.warn("Posted to AWS CloudWatch ahead of schedule. If you see this often, consider increasing the cloudwatch queue_size option.")
|
188
|
+
end
|
189
|
+
|
190
|
+
@logger.debug("Queueing event", :event => event)
|
191
|
+
@event_queue << event
|
192
|
+
end # def receive
|
193
|
+
|
194
|
+
private
|
195
|
+
def publish(aggregates)
|
196
|
+
aggregates.each do |namespace, data|
|
197
|
+
@logger.debug("Namespace, data: ", :namespace => namespace, :data => data)
|
198
|
+
metric_data = []
|
199
|
+
data.each do |aggregate_key, stats|
|
200
|
+
new_data = {
|
201
|
+
:metric_name => aggregate_key[METRIC],
|
202
|
+
:timestamp => aggregate_key[TIMESTAMP],
|
203
|
+
:unit => aggregate_key[UNIT],
|
204
|
+
:statistic_values => {
|
205
|
+
:sample_count => stats[COUNT],
|
206
|
+
:sum => stats[SUM],
|
207
|
+
:minimum => stats[MIN],
|
208
|
+
:maximum => stats[MAX],
|
209
|
+
}
|
210
|
+
}
|
211
|
+
dims = aggregate_key[DIMENSIONS]
|
212
|
+
if (dims.is_a?(Array) && dims.length > 0 && (dims.length % 2) == 0)
|
213
|
+
new_data[:dimensions] = Array.new
|
214
|
+
i = 0
|
215
|
+
while (i < dims.length)
|
216
|
+
new_data[:dimensions] << {:name => dims[i], :value => dims[i+1]}
|
217
|
+
i += 2
|
218
|
+
end
|
219
|
+
end
|
220
|
+
metric_data << new_data
|
221
|
+
end # data.each
|
222
|
+
|
223
|
+
metric_data.each_slice(@batch_size) do |batch|
|
224
|
+
begin
|
225
|
+
@cw.put_metric_data(
|
226
|
+
:namespace => namespace,
|
227
|
+
:metric_data => batch
|
228
|
+
)
|
229
|
+
@logger.debug("Sent data to AWS CloudWatch OK", :namespace => namespace, :metric_data => batch)
|
230
|
+
rescue Exception => e
|
231
|
+
@logger.warn("Failed to send to AWS CloudWatch", :exception => e, :namespace => namespace, :metric_data => batch)
|
232
|
+
break
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end # aggregates.each
|
236
|
+
return aggregates
|
237
|
+
end# def publish
|
238
|
+
|
239
|
+
private
|
240
|
+
def aggregate(aggregates)
|
241
|
+
@logger.debug("QUEUE SIZE ", :queuesize => @event_queue.size)
|
242
|
+
while !@event_queue.empty? do
|
243
|
+
begin
|
244
|
+
count(aggregates, @event_queue.pop(true))
|
245
|
+
rescue Exception => e
|
246
|
+
@logger.warn("Exception! Breaking count loop", :exception => e)
|
247
|
+
break
|
248
|
+
end
|
249
|
+
end
|
250
|
+
return aggregates
|
251
|
+
end # def aggregate
|
252
|
+
|
253
|
+
private
|
254
|
+
def count(aggregates, event)
|
255
|
+
# If the event doesn't declare a namespace, use the default
|
256
|
+
fnamespace = field(event, @field_namespace)
|
257
|
+
namespace = (fnamespace ? fnamespace : event.sprintf(@namespace))
|
258
|
+
|
259
|
+
funit = field(event, @field_unit)
|
260
|
+
unit = (funit ? funit : event.sprintf(@unit))
|
261
|
+
|
262
|
+
fvalue = field(event, @field_value)
|
263
|
+
value = (fvalue ? fvalue : event.sprintf(@value))
|
264
|
+
|
265
|
+
# We may get to this point with valid Units but missing value. Send zeros.
|
266
|
+
val = (!value) ? 0.0 : value.to_f
|
267
|
+
|
268
|
+
# Event provides exactly one (but not both) of value or unit
|
269
|
+
if ( (fvalue == nil) ^ (funit == nil) )
|
270
|
+
@logger.warn("Likely config error: event has one of #{@field_value} or #{@field_unit} fields but not both.", :event => event)
|
271
|
+
end
|
272
|
+
|
273
|
+
# If Unit is still not set or is invalid warn about misconfiguration & use NONE
|
274
|
+
if (!VALID_UNITS.include?(unit))
|
275
|
+
unit = NONE
|
276
|
+
@logger.warn("Likely config error: invalid or missing Units (#{unit.to_s}), using '#{NONE}' instead", :event => event)
|
277
|
+
end
|
278
|
+
|
279
|
+
if (!aggregates[namespace])
|
280
|
+
aggregates[namespace] = {}
|
281
|
+
end
|
282
|
+
|
283
|
+
dims = event.get(@field_dimensions)
|
284
|
+
if (dims) # event provides dimensions
|
285
|
+
# validate the structure
|
286
|
+
if (!dims.is_a?(Array) || dims.length == 0 || (dims.length % 2) != 0)
|
287
|
+
@logger.warn("Likely config error: CloudWatch dimensions field (#{dims.to_s}) found which is not a positive- & even-length array. Ignoring it.", :event => event)
|
288
|
+
dims = nil
|
289
|
+
end
|
290
|
+
# Best case, we get here and exit the conditional because dims...
|
291
|
+
# - is an array
|
292
|
+
# - with positive length
|
293
|
+
# - and an even number of elements
|
294
|
+
elsif (@dimensions.is_a?(Hash)) # event did not provide dimensions, but the output has been configured with a default
|
295
|
+
dims = @dimensions.flatten.map{|d| event.sprintf(d)} # into the kind of array described just above
|
296
|
+
else
|
297
|
+
dims = nil
|
298
|
+
end
|
299
|
+
|
300
|
+
fmetric = field(event, @field_metricname)
|
301
|
+
aggregate_key = {
|
302
|
+
METRIC => (fmetric ? fmetric : event.sprintf(@metricname)),
|
303
|
+
DIMENSIONS => dims,
|
304
|
+
UNIT => unit,
|
305
|
+
TIMESTAMP => event.sprintf("%{+YYYY-MM-dd'T'HH:mm:00Z}")
|
306
|
+
}
|
307
|
+
|
308
|
+
if (!aggregates[namespace][aggregate_key])
|
309
|
+
aggregates[namespace][aggregate_key] = {}
|
310
|
+
end
|
311
|
+
|
312
|
+
if (!aggregates[namespace][aggregate_key][MAX] || val > aggregates[namespace][aggregate_key][MAX])
|
313
|
+
aggregates[namespace][aggregate_key][MAX] = val
|
314
|
+
end
|
315
|
+
|
316
|
+
if (!aggregates[namespace][aggregate_key][MIN] || val < aggregates[namespace][aggregate_key][MIN])
|
317
|
+
aggregates[namespace][aggregate_key][MIN] = val
|
318
|
+
end
|
319
|
+
|
320
|
+
if (!aggregates[namespace][aggregate_key][COUNT])
|
321
|
+
aggregates[namespace][aggregate_key][COUNT] = 1
|
322
|
+
else
|
323
|
+
aggregates[namespace][aggregate_key][COUNT] += 1
|
324
|
+
end
|
325
|
+
|
326
|
+
if (!aggregates[namespace][aggregate_key][SUM])
|
327
|
+
aggregates[namespace][aggregate_key][SUM] = val
|
328
|
+
else
|
329
|
+
aggregates[namespace][aggregate_key][SUM] += val
|
330
|
+
end
|
331
|
+
end # def count
|
332
|
+
|
333
|
+
private
|
334
|
+
def field(event, fieldname)
|
335
|
+
if !event.get(fieldname)
|
336
|
+
return nil
|
337
|
+
else
|
338
|
+
if event.get(fieldname).is_a?(Array)
|
339
|
+
return event.get(fieldname).first
|
340
|
+
else
|
341
|
+
return event.get(fieldname)
|
342
|
+
end
|
343
|
+
end
|
344
|
+
end # def field
|
345
|
+
|
346
|
+
end # class LogStash::Outputs::CloudWatch
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "java"
|
3
|
+
require "concurrent"
|
4
|
+
require "concurrent/timer_task"
|
5
|
+
require "logstash/util"
|
6
|
+
|
7
|
+
ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
|
8
|
+
|
9
|
+
module LogStash
|
10
|
+
module Outputs
|
11
|
+
class S3
|
12
|
+
class FileRepository
|
13
|
+
DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
|
14
|
+
DEFAULT_STALE_TIME_SECS = 15 * 60
|
15
|
+
# Ensure that all access or work done
|
16
|
+
# on a factory is threadsafe
|
17
|
+
class PrefixedValue
|
18
|
+
def initialize(file_factory, stale_time)
|
19
|
+
@file_factory = file_factory
|
20
|
+
@lock = Mutex.new
|
21
|
+
@stale_time = stale_time
|
22
|
+
end
|
23
|
+
|
24
|
+
def with_lock
|
25
|
+
@lock.synchronize {
|
26
|
+
yield @file_factory
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def stale?
|
31
|
+
with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def apply(prefix)
|
35
|
+
return self
|
36
|
+
end
|
37
|
+
|
38
|
+
def delete!
|
39
|
+
with_lock{ |factory| factory.current.delete! }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class FactoryInitializer
|
44
|
+
include java.util.function.Function
|
45
|
+
def initialize(tags, encoding, temporary_directory, stale_time)
|
46
|
+
@tags = tags
|
47
|
+
@encoding = encoding
|
48
|
+
@temporary_directory = temporary_directory
|
49
|
+
@stale_time = stale_time
|
50
|
+
end
|
51
|
+
|
52
|
+
def apply(prefix_key)
|
53
|
+
PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def initialize(tags, encoding, temporary_directory,
|
58
|
+
stale_time = DEFAULT_STALE_TIME_SECS,
|
59
|
+
sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
|
60
|
+
# The path need to contains the prefix so when we start
|
61
|
+
# logtash after a crash we keep the remote structure
|
62
|
+
@prefixed_factories = ConcurrentHashMap.new
|
63
|
+
|
64
|
+
@sweeper_interval = sweeper_interval
|
65
|
+
|
66
|
+
@factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
|
67
|
+
|
68
|
+
start_stale_sweeper
|
69
|
+
end
|
70
|
+
|
71
|
+
def keys
|
72
|
+
@prefixed_factories.keySet
|
73
|
+
end
|
74
|
+
|
75
|
+
def each_files
|
76
|
+
@prefixed_factories.elements.each do |prefixed_file|
|
77
|
+
prefixed_file.with_lock { |factory| yield factory.current }
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Return the file factory
|
82
|
+
def get_factory(prefix_key)
|
83
|
+
@prefixed_factories.computeIfAbsent(prefix_key, @factory_initializer).with_lock { |factory| yield factory }
|
84
|
+
end
|
85
|
+
|
86
|
+
def get_file(prefix_key)
|
87
|
+
get_factory(prefix_key) { |factory| yield factory.current }
|
88
|
+
end
|
89
|
+
|
90
|
+
def shutdown
|
91
|
+
stop_stale_sweeper
|
92
|
+
end
|
93
|
+
|
94
|
+
def size
|
95
|
+
@prefixed_factories.size
|
96
|
+
end
|
97
|
+
|
98
|
+
def remove_stale(k, v)
|
99
|
+
if v.stale?
|
100
|
+
@prefixed_factories.remove(k, v)
|
101
|
+
v.delete!
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def start_stale_sweeper
|
106
|
+
@stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
|
107
|
+
LogStash::Util.set_thread_name("S3, Stale factory sweeper")
|
108
|
+
|
109
|
+
@prefixed_factories.forEach{|k,v| remove_stale(k,v)}
|
110
|
+
end
|
111
|
+
|
112
|
+
@stale_sweeper.execute
|
113
|
+
end
|
114
|
+
|
115
|
+
def stop_stale_sweeper
|
116
|
+
@stale_sweeper.shutdown
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class PathValidator
|
6
|
+
INVALID_CHARACTERS = "\^`><"
|
7
|
+
|
8
|
+
def self.valid?(name)
|
9
|
+
name.match(matches_re).nil?
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.matches_re
|
13
|
+
/[#{Regexp.escape(INVALID_CHARACTERS)}]/
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/outputs/s3/size_rotation_policy"
|
3
|
+
require "logstash/outputs/s3/time_rotation_policy"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class S3
|
8
|
+
class SizeAndTimeRotationPolicy
|
9
|
+
def initialize(file_size, time_file)
|
10
|
+
@size_strategy = SizeRotationPolicy.new(file_size)
|
11
|
+
@time_strategy = TimeRotationPolicy.new(time_file)
|
12
|
+
end
|
13
|
+
|
14
|
+
def rotate?(file)
|
15
|
+
@size_strategy.rotate?(file) || @time_strategy.rotate?(file)
|
16
|
+
end
|
17
|
+
|
18
|
+
def needs_periodic?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class SizeRotationPolicy
|
6
|
+
attr_reader :size_file
|
7
|
+
|
8
|
+
def initialize(size_file)
|
9
|
+
if size_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@size_file = size_file
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size >= size_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "thread"
|
3
|
+
require "forwardable"
|
4
|
+
require "fileutils"
|
5
|
+
|
6
|
+
module LogStash
|
7
|
+
module Outputs
|
8
|
+
class S3
|
9
|
+
# Wrap the actual file descriptor into an utility classe
|
10
|
+
# It make it more OOP and easier to reason with the paths.
|
11
|
+
class TemporaryFile
|
12
|
+
extend Forwardable
|
13
|
+
|
14
|
+
def_delegators :@fd, :path, :write, :close, :fsync
|
15
|
+
|
16
|
+
attr_reader :fd
|
17
|
+
|
18
|
+
def initialize(key, fd, temp_path)
|
19
|
+
@fd = fd
|
20
|
+
@key = key
|
21
|
+
@temp_path = temp_path
|
22
|
+
@created_at = Time.now
|
23
|
+
end
|
24
|
+
|
25
|
+
def ctime
|
26
|
+
@created_at
|
27
|
+
end
|
28
|
+
|
29
|
+
def temp_path
|
30
|
+
@temp_path
|
31
|
+
end
|
32
|
+
|
33
|
+
def size
|
34
|
+
# Use the fd size to get the accurate result,
|
35
|
+
# so we dont have to deal with fsync
|
36
|
+
# if the file is close we will use the File::size
|
37
|
+
begin
|
38
|
+
@fd.size
|
39
|
+
rescue IOError
|
40
|
+
::File.size(path)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def key
|
45
|
+
@key.gsub(/^\//, "")
|
46
|
+
end
|
47
|
+
|
48
|
+
# Each temporary file is made inside a directory named with an UUID,
|
49
|
+
# instead of deleting the file directly and having the risk of deleting other files
|
50
|
+
# we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
|
51
|
+
# a sandbox.
|
52
|
+
def delete!
|
53
|
+
@fd.close rescue IOError # force close anyway
|
54
|
+
FileUtils.rm_r(@temp_path, :secure => true)
|
55
|
+
end
|
56
|
+
|
57
|
+
def empty?
|
58
|
+
size == 0
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.create_from_existing_file(file_path, temporary_folder)
|
62
|
+
key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
|
63
|
+
|
64
|
+
TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
|
65
|
+
::File.open(file_path, "r"),
|
66
|
+
::File.join(temporary_folder, key_parts.slice(0, 1)))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|