logstash-integration-aws 7.1.1-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +33 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/VERSION +1 -0
  10. data/docs/codec-cloudfront.asciidoc +53 -0
  11. data/docs/codec-cloudtrail.asciidoc +45 -0
  12. data/docs/index.asciidoc +36 -0
  13. data/docs/input-cloudwatch.asciidoc +320 -0
  14. data/docs/input-s3.asciidoc +346 -0
  15. data/docs/input-sqs.asciidoc +287 -0
  16. data/docs/output-cloudwatch.asciidoc +321 -0
  17. data/docs/output-s3.asciidoc +442 -0
  18. data/docs/output-sns.asciidoc +166 -0
  19. data/docs/output-sqs.asciidoc +242 -0
  20. data/lib/logstash/codecs/cloudfront.rb +84 -0
  21. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  22. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  23. data/lib/logstash/inputs/s3.rb +466 -0
  24. data/lib/logstash/inputs/sqs.rb +196 -0
  25. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  26. data/lib/logstash/outputs/s3/file_repository.rb +193 -0
  27. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  28. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  29. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  30. data/lib/logstash/outputs/s3/temporary_file.rb +114 -0
  31. data/lib/logstash/outputs/s3/temporary_file_factory.rb +126 -0
  32. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  33. data/lib/logstash/outputs/s3/uploader.rb +76 -0
  34. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  35. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  36. data/lib/logstash/outputs/s3.rb +442 -0
  37. data/lib/logstash/outputs/sns.rb +133 -0
  38. data/lib/logstash/outputs/sqs.rb +167 -0
  39. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  40. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  41. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  42. data/lib/logstash-integration-aws_jars.rb +4 -0
  43. data/lib/tasks/build.rake +15 -0
  44. data/logstash-integration-aws.gemspec +55 -0
  45. data/spec/codecs/cloudfront_spec.rb +92 -0
  46. data/spec/codecs/cloudtrail_spec.rb +56 -0
  47. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  48. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  49. data/spec/fixtures/cloudfront.log +4 -0
  50. data/spec/fixtures/compressed.log.gee.zip +0 -0
  51. data/spec/fixtures/compressed.log.gz +0 -0
  52. data/spec/fixtures/compressed.log.gzip +0 -0
  53. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  54. data/spec/fixtures/json.log +2 -0
  55. data/spec/fixtures/json_with_message.log +2 -0
  56. data/spec/fixtures/multiline.log +6 -0
  57. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  58. data/spec/fixtures/uncompressed.log +2 -0
  59. data/spec/inputs/cloudwatch_spec.rb +85 -0
  60. data/spec/inputs/s3_spec.rb +610 -0
  61. data/spec/inputs/sincedb_spec.rb +17 -0
  62. data/spec/inputs/sqs_spec.rb +324 -0
  63. data/spec/integration/cloudwatch_spec.rb +25 -0
  64. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  65. data/spec/integration/gzip_file_spec.rb +62 -0
  66. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  67. data/spec/integration/outputs/sqs_spec.rb +98 -0
  68. data/spec/integration/restore_from_crash_spec.rb +133 -0
  69. data/spec/integration/s3_spec.rb +66 -0
  70. data/spec/integration/size_rotation_spec.rb +59 -0
  71. data/spec/integration/sqs_spec.rb +110 -0
  72. data/spec/integration/stress_test_spec.rb +60 -0
  73. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  74. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  75. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  76. data/spec/outputs/cloudwatch_spec.rb +38 -0
  77. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  78. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  79. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  80. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  81. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  82. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  83. data/spec/outputs/s3/uploader_spec.rb +69 -0
  84. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  85. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  86. data/spec/outputs/s3_spec.rb +232 -0
  87. data/spec/outputs/sns_spec.rb +160 -0
  88. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  89. data/spec/spec_helper.rb +8 -0
  90. data/spec/support/helpers.rb +121 -0
  91. data/spec/unit/outputs/sqs_spec.rb +247 -0
  92. data/vendor/jar-dependencies/org/logstash/plugins/integration/aws/logstash-integration-aws/7.1.1/logstash-integration-aws-7.1.1.jar +0 -0
  93. metadata +472 -0
@@ -0,0 +1,346 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/plugin_mixins/aws_config"
5
+
6
+ require "rufus/scheduler"
7
+
8
+ # This output lets you aggregate and send metric data to AWS CloudWatch
9
+ #
10
+ # ==== Summary:
11
+ # This plugin is intended to be used on a logstash indexer agent (but that
12
+ # is not the only way, see below.) In the intended scenario, one cloudwatch
13
+ # output plugin is configured, on the logstash indexer node, with just AWS API
14
+ # credentials, and possibly a region and/or a namespace. The output looks
15
+ # for fields present in events, and when it finds them, it uses them to
16
+ # calculate aggregate statistics. If the `metricname` option is set in this
17
+ # output, then any events which pass through it will be aggregated & sent to
18
+ # CloudWatch, but that is not recommended. The intended use is to NOT set the
19
+ # metricname option here, and instead to add a `CW_metricname` field (and other
20
+ # fields) to only the events you want sent to CloudWatch.
21
+ #
22
+ # When events pass through this output they are queued for background
23
+ # aggregation and sending, which happens every minute by default. The
24
+ # queue has a maximum size, and when it is full aggregated statistics will be
25
+ # sent to CloudWatch ahead of schedule. Whenever this happens a warning
26
+ # message is written to logstash's log. If you see this you should increase
27
+ # the `queue_size` configuration option to avoid the extra API calls. The queue
28
+ # is emptied every time we send data to CloudWatch.
29
+ #
30
+ # Note: when logstash is stopped the queue is destroyed before it can be processed.
31
+ # This is a known limitation of logstash and will hopefully be addressed in a
32
+ # future version.
33
+ #
34
+ # ==== Details:
35
+ # There are two ways to configure this plugin, and they can be used in
36
+ # combination: event fields & per-output defaults
37
+ #
38
+ # Event Field configuration...
39
+ # You add fields to your events in inputs & filters and this output reads
40
+ # those fields to aggregate events. The names of the fields read are
41
+ # configurable via the `field_*` options.
42
+ #
43
+ # Per-output defaults...
44
+ # You set universal defaults in this output plugin's configuration, and
45
+ # if an event does not have a field for that option then the default is
46
+ # used.
47
+ #
48
+ # Notice, the event fields take precedence over the per-output defaults.
49
+ #
50
+ # At a minimum events must have a "metric name" to be sent to CloudWatch.
51
+ # This can be achieved either by providing a default here OR by adding a
52
+ # `CW_metricname` field. By default, if no other configuration is provided
53
+ # besides a metric name, then events will be counted (Unit: Count, Value: 1)
54
+ # by their metric name (either a default or from their `CW_metricname` field)
55
+ #
56
+ # Other fields which can be added to events to modify the behavior of this
57
+ # plugin are, `CW_namespace`, `CW_unit`, `CW_value`, and
58
+ # `CW_dimensions`. All of these field names are configurable in
59
+ # this output. You can also set per-output defaults for any of them.
60
+ # See below for details.
61
+ #
62
+ # Read more about http://aws.amazon.com/cloudwatch/[AWS CloudWatch],
63
+ # and the specific of API endpoint this output uses,
64
+ # http://docs.amazonwebservices.com/AmazonCloudWatch/latest/APIReference/API_PutMetricData.html[PutMetricData]
65
+ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base
66
+ include LogStash::PluginMixins::AwsConfig::V2
67
+
68
+ config_name "cloudwatch"
69
+
70
+ # Constants
71
+ # aggregate_key members
72
+ DIMENSIONS = "dimensions"
73
+ TIMESTAMP = "timestamp"
74
+ METRIC = "metric"
75
+ COUNT = "count"
76
+ UNIT = "unit"
77
+ SUM = "sum"
78
+ MIN = "min"
79
+ MAX = "max"
80
+ # Units
81
+ COUNT_UNIT = "Count"
82
+ NONE = "None"
83
+
84
+ # How often to send data to CloudWatch
85
+ # This does not affect the event timestamps, events will always have their
86
+ # actual timestamp (to-the-minute) sent to CloudWatch.
87
+ #
88
+ # We only call the API if there is data to send.
89
+ #
90
+ # See the Rufus Scheduler docs for an https://github.com/jmettraux/rufus-scheduler#the-time-strings-understood-by-rufus-scheduler[explanation of allowed values]
91
+ config :timeframe, :validate => :string, :default => "1m"
92
+
93
+ # How many events to queue before forcing a call to the CloudWatch API ahead of `timeframe` schedule
94
+ # Set this to the number of events-per-timeframe you will be sending to CloudWatch to avoid extra API calls
95
+ config :queue_size, :validate => :number, :default => 10000
96
+
97
+ # How many data points can be given in one call to the CloudWatch API
98
+ config :batch_size, :validate => :number, :default => 20
99
+
100
+ # The default namespace to use for events which do not have a `CW_namespace` field
101
+ config :namespace, :validate => :string, :default => "Logstash"
102
+
103
+ # The name of the field used to set a different namespace per event
104
+ # Note: Only one namespace can be sent to CloudWatch per API call
105
+ # so setting different namespaces will increase the number of API calls
106
+ # and those cost money.
107
+ config :field_namespace, :validate => :string, :default => "CW_namespace"
108
+
109
+ # The default metric name to use for events which do not have a `CW_metricname` field.
110
+ # Beware: If this is provided then all events which pass through this output will be aggregated and
111
+ # sent to CloudWatch, so use this carefully. Furthermore, when providing this option, you
112
+ # will probably want to also restrict events from passing through this output using event
113
+ # type, tag, and field matching
114
+ config :metricname, :validate => :string
115
+
116
+ # The name of the field used to set the metric name on an event
117
+ # The author of this plugin recommends adding this field to events in inputs &
118
+ # filters rather than using the per-output default setting so that one output
119
+ # plugin on your logstash indexer can serve all events (which of course had
120
+ # fields set on your logstash shippers.)
121
+ config :field_metricname, :validate => :string, :default => "CW_metricname"
122
+
123
+ VALID_UNITS = ["Seconds", "Microseconds", "Milliseconds", "Bytes",
124
+ "Kilobytes", "Megabytes", "Gigabytes", "Terabytes",
125
+ "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits",
126
+ "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second",
127
+ "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second",
128
+ "Bits/Second", "Kilobits/Second", "Megabits/Second",
129
+ "Gigabits/Second", "Terabits/Second", "Count/Second", NONE]
130
+
131
+ # The default unit to use for events which do not have a `CW_unit` field
132
+ # If you set this option you should probably set the "value" option along with it
133
+ config :unit, :validate => VALID_UNITS, :default => COUNT_UNIT
134
+
135
+ # The name of the field used to set the unit on an event metric
136
+ config :field_unit, :validate => :string, :default => "CW_unit"
137
+
138
+ # The default value to use for events which do not have a `CW_value` field
139
+ # If provided, this must be a string which can be converted to a float, for example...
140
+ # "1", "2.34", ".5", and "0.67"
141
+ # If you set this option you should probably set the `unit` option along with it
142
+ config :value, :validate => :string, :default => "1"
143
+
144
+ # The name of the field used to set the value (float) on an event metric
145
+ config :field_value, :validate => :string, :default => "CW_value"
146
+
147
+ # The default dimensions [ name, value, ... ] to use for events which do not have a `CW_dimensions` field
148
+ config :dimensions, :validate => :hash
149
+
150
+ # The name of the field used to set the dimensions on an event metric
151
+ # The field named here, if present in an event, must have an array of
152
+ # one or more key & value pairs, for example...
153
+ # `add_field => [ "CW_dimensions", "Environment", "CW_dimensions", "prod" ]`
154
+ # or, equivalently...
155
+ # `add_field => [ "CW_dimensions", "Environment" ]`
156
+ # `add_field => [ "CW_dimensions", "prod" ]`
157
+ config :field_dimensions, :validate => :string, :default => "CW_dimensions"
158
+
159
+ attr_reader :event_queue
160
+
161
+ public
162
+ def register
163
+ require "thread"
164
+ require "aws-sdk-cloudwatch"
165
+
166
+ @cw = Aws::CloudWatch::Client.new(aws_options_hash)
167
+
168
+ @event_queue = SizedQueue.new(@queue_size)
169
+ @scheduler = Rufus::Scheduler.new
170
+ @job = @scheduler.schedule_every @timeframe do
171
+ @logger.debug("Scheduler Activated")
172
+ publish(aggregate({}))
173
+ end
174
+ end # def register
175
+
176
+ # Rufus::Scheduler >= 3.4 moved the Time impl into a gem EoTime = ::EtOrbi::EoTime`
177
+ # Rufus::Scheduler 3.1 - 3.3 using it's own Time impl `Rufus::Scheduler::ZoTime`
178
+ RufusTimeImpl = defined?(Rufus::Scheduler::EoTime) ? Rufus::Scheduler::EoTime :
179
+ (defined?(Rufus::Scheduler::ZoTime) ? Rufus::Scheduler::ZoTime : ::Time)
180
+
181
+ public
182
+ def receive(event)
183
+ return unless (event.get(@field_metricname) || @metricname)
184
+
185
+ if (@event_queue.length >= @event_queue.max)
186
+ @job.trigger RufusTimeImpl.now
187
+ @logger.warn("Posted to AWS CloudWatch ahead of schedule. If you see this often, consider increasing the cloudwatch queue_size option.")
188
+ end
189
+
190
+ @logger.debug("Queueing event", :event => event)
191
+ @event_queue << event
192
+ end # def receive
193
+
194
+ private
195
+ def publish(aggregates)
196
+ aggregates.each do |namespace, data|
197
+ @logger.debug("Namespace, data: ", :namespace => namespace, :data => data)
198
+ metric_data = []
199
+ data.each do |aggregate_key, stats|
200
+ new_data = {
201
+ :metric_name => aggregate_key[METRIC],
202
+ :timestamp => aggregate_key[TIMESTAMP],
203
+ :unit => aggregate_key[UNIT],
204
+ :statistic_values => {
205
+ :sample_count => stats[COUNT],
206
+ :sum => stats[SUM],
207
+ :minimum => stats[MIN],
208
+ :maximum => stats[MAX],
209
+ }
210
+ }
211
+ dims = aggregate_key[DIMENSIONS]
212
+ if (dims.is_a?(Array) && dims.length > 0 && (dims.length % 2) == 0)
213
+ new_data[:dimensions] = Array.new
214
+ i = 0
215
+ while (i < dims.length)
216
+ new_data[:dimensions] << {:name => dims[i], :value => dims[i+1]}
217
+ i += 2
218
+ end
219
+ end
220
+ metric_data << new_data
221
+ end # data.each
222
+
223
+ metric_data.each_slice(@batch_size) do |batch|
224
+ begin
225
+ @cw.put_metric_data(
226
+ :namespace => namespace,
227
+ :metric_data => batch
228
+ )
229
+ @logger.debug("Sent data to AWS CloudWatch OK", :namespace => namespace, :metric_data => batch)
230
+ rescue Exception => e
231
+ @logger.warn("Failed to send to AWS CloudWatch", :exception => e, :namespace => namespace, :metric_data => batch)
232
+ break
233
+ end
234
+ end
235
+ end # aggregates.each
236
+ return aggregates
237
+ end# def publish
238
+
239
+ private
240
+ def aggregate(aggregates)
241
+ @logger.debug("QUEUE SIZE ", :queuesize => @event_queue.size)
242
+ while !@event_queue.empty? do
243
+ begin
244
+ count(aggregates, @event_queue.pop(true))
245
+ rescue Exception => e
246
+ @logger.warn("Exception! Breaking count loop", :exception => e)
247
+ break
248
+ end
249
+ end
250
+ return aggregates
251
+ end # def aggregate
252
+
253
+ private
254
+ def count(aggregates, event)
255
+ # If the event doesn't declare a namespace, use the default
256
+ fnamespace = field(event, @field_namespace)
257
+ namespace = (fnamespace ? fnamespace : event.sprintf(@namespace))
258
+
259
+ funit = field(event, @field_unit)
260
+ unit = (funit ? funit : event.sprintf(@unit))
261
+
262
+ fvalue = field(event, @field_value)
263
+ value = (fvalue ? fvalue : event.sprintf(@value))
264
+
265
+ # We may get to this point with valid Units but missing value. Send zeros.
266
+ val = (!value) ? 0.0 : value.to_f
267
+
268
+ # Event provides exactly one (but not both) of value or unit
269
+ if ( (fvalue == nil) ^ (funit == nil) )
270
+ @logger.warn("Likely config error: event has one of #{@field_value} or #{@field_unit} fields but not both.", :event => event)
271
+ end
272
+
273
+ # If Unit is still not set or is invalid warn about misconfiguration & use NONE
274
+ if (!VALID_UNITS.include?(unit))
275
+ unit = NONE
276
+ @logger.warn("Likely config error: invalid or missing Units (#{unit.to_s}), using '#{NONE}' instead", :event => event)
277
+ end
278
+
279
+ if (!aggregates[namespace])
280
+ aggregates[namespace] = {}
281
+ end
282
+
283
+ dims = event.get(@field_dimensions)
284
+ if (dims) # event provides dimensions
285
+ # validate the structure
286
+ if (!dims.is_a?(Array) || dims.length == 0 || (dims.length % 2) != 0)
287
+ @logger.warn("Likely config error: CloudWatch dimensions field (#{dims.to_s}) found which is not a positive- & even-length array. Ignoring it.", :event => event)
288
+ dims = nil
289
+ end
290
+ # Best case, we get here and exit the conditional because dims...
291
+ # - is an array
292
+ # - with positive length
293
+ # - and an even number of elements
294
+ elsif (@dimensions.is_a?(Hash)) # event did not provide dimensions, but the output has been configured with a default
295
+ dims = @dimensions.flatten.map{|d| event.sprintf(d)} # into the kind of array described just above
296
+ else
297
+ dims = nil
298
+ end
299
+
300
+ fmetric = field(event, @field_metricname)
301
+ aggregate_key = {
302
+ METRIC => (fmetric ? fmetric : event.sprintf(@metricname)),
303
+ DIMENSIONS => dims,
304
+ UNIT => unit,
305
+ TIMESTAMP => event.sprintf("%{+YYYY-MM-dd'T'HH:mm:00Z}")
306
+ }
307
+
308
+ if (!aggregates[namespace][aggregate_key])
309
+ aggregates[namespace][aggregate_key] = {}
310
+ end
311
+
312
+ if (!aggregates[namespace][aggregate_key][MAX] || val > aggregates[namespace][aggregate_key][MAX])
313
+ aggregates[namespace][aggregate_key][MAX] = val
314
+ end
315
+
316
+ if (!aggregates[namespace][aggregate_key][MIN] || val < aggregates[namespace][aggregate_key][MIN])
317
+ aggregates[namespace][aggregate_key][MIN] = val
318
+ end
319
+
320
+ if (!aggregates[namespace][aggregate_key][COUNT])
321
+ aggregates[namespace][aggregate_key][COUNT] = 1
322
+ else
323
+ aggregates[namespace][aggregate_key][COUNT] += 1
324
+ end
325
+
326
+ if (!aggregates[namespace][aggregate_key][SUM])
327
+ aggregates[namespace][aggregate_key][SUM] = val
328
+ else
329
+ aggregates[namespace][aggregate_key][SUM] += val
330
+ end
331
+ end # def count
332
+
333
+ private
334
+ def field(event, fieldname)
335
+ if !event.get(fieldname)
336
+ return nil
337
+ else
338
+ if event.get(fieldname).is_a?(Array)
339
+ return event.get(fieldname).first
340
+ else
341
+ return event.get(fieldname)
342
+ end
343
+ end
344
+ end # def field
345
+
346
+ end # class LogStash::Outputs::CloudWatch
@@ -0,0 +1,193 @@
1
+ # encoding: utf-8
2
+ require "java"
3
+ require "concurrent"
4
+ require "concurrent/timer_task"
5
+ require "logstash/util"
6
+
7
+ ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
8
+
9
+ module LogStash
10
+ module Outputs
11
+ class S3
12
+ class FileRepository
13
+ DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
14
+ DEFAULT_STALE_TIME_SECS = 15 * 60
15
+ # Ensure that all access or work done
16
+ # on a factory is threadsafe
17
+ class PrefixedValue
18
+ def initialize(file_factory, stale_time)
19
+ @file_factory = file_factory
20
+ @lock = Monitor.new # reentrant Mutex
21
+ @stale_time = stale_time
22
+ @is_deleted = false
23
+ end
24
+
25
+ def with_lock
26
+ @lock.synchronize {
27
+ yield @file_factory
28
+ }
29
+ end
30
+
31
+ def stale?
32
+ with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
33
+ end
34
+
35
+ def apply(prefix)
36
+ return self
37
+ end
38
+
39
+ def delete!
40
+ with_lock do |factory|
41
+ factory.current.delete!
42
+ @is_deleted = true
43
+ end
44
+ end
45
+
46
+ def deleted?
47
+ with_lock { |_| @is_deleted }
48
+ end
49
+ end
50
+
51
+ class FactoryInitializer
52
+ include java.util.function.Function
53
+ def initialize(tags, encoding, temporary_directory, stale_time)
54
+ @tags = tags
55
+ @encoding = encoding
56
+ @temporary_directory = temporary_directory
57
+ @stale_time = stale_time
58
+ end
59
+
60
+ def apply(prefix_key)
61
+ PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
62
+ end
63
+ end
64
+
65
+ def initialize(tags, encoding, temporary_directory,
66
+ stale_time = DEFAULT_STALE_TIME_SECS,
67
+ sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
68
+ # The path need to contains the prefix so when we start
69
+ # logtash after a crash we keep the remote structure
70
+ @prefixed_factories = ConcurrentHashMap.new
71
+
72
+ @sweeper_interval = sweeper_interval
73
+
74
+ @factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
75
+
76
+ start_stale_sweeper
77
+ end
78
+
79
+ def keys
80
+ @prefixed_factories.keySet
81
+ end
82
+
83
+ ##
84
+ # Yields the current file of each non-deleted file factory while the current thread has exclusive access to it.
85
+ # @yieldparam file [TemporaryFile]
86
+ # @return [void]
87
+ def each_files
88
+ each_factory(keys) do |factory|
89
+ yield factory.current
90
+ end
91
+ nil # void return avoid leaking unsynchronized access
92
+ end
93
+
94
+ ##
95
+ # Yields the file factory while the current thread has exclusive access to it, creating a new
96
+ # one if one does not exist or if the current one is being reaped by the stale watcher.
97
+ # @param prefix_key [String]: the prefix key
98
+ # @yieldparam factory [TemporaryFileFactory]: a temporary file factory that this thread has exclusive access to
99
+ # @return [void]
100
+ def get_factory(prefix_key)
101
+ # fast-path: if factory exists and is not deleted, yield it with exclusive access and return
102
+ prefix_val = @prefixed_factories.get(prefix_key)
103
+ prefix_val&.with_lock do |factory|
104
+ # intentional local-jump to ensure deletion detection
105
+ # is done inside the exclusive access.
106
+ unless prefix_val.deleted?
107
+ yield(factory)
108
+ return nil # void return avoid leaking unsynchronized access
109
+ end
110
+ end
111
+
112
+ # slow-path:
113
+ # the ConcurrentHashMap#get operation is lock-free, but may have returned an entry that was being deleted by
114
+ # another thread (such as via stale detection). If we failed to retrieve a value, or retrieved one that had
115
+ # been marked deleted, use the atomic ConcurrentHashMap#compute to retrieve a non-deleted entry.
116
+ prefix_val = @prefixed_factories.compute(prefix_key) do |_, existing|
117
+ existing && !existing.deleted? ? existing : @factory_initializer.apply(prefix_key)
118
+ end
119
+ prefix_val.with_lock { |factory| yield factory }
120
+ nil # void return avoid leaking unsynchronized access
121
+ end
122
+
123
+ ##
124
+ # Yields each non-deleted file factory while the current thread has exclusive access to it.
125
+ # @param prefixes [Array<String>]: the prefix keys
126
+ # @yieldparam factory [TemporaryFileFactory]
127
+ # @return [void]
128
+ def each_factory(prefixes)
129
+ prefixes.each do |prefix_key|
130
+ prefix_val = @prefixed_factories.get(prefix_key)
131
+ prefix_val&.with_lock do |factory|
132
+ yield factory unless prefix_val.deleted?
133
+ end
134
+ end
135
+ nil # void return avoid leaking unsynchronized access
136
+ end
137
+
138
+ ##
139
+ # Ensures that a non-deleted factory exists for the provided prefix and yields its current file
140
+ # while the current thread has exclusive access to it.
141
+ # @param prefix_key [String]
142
+ # @yieldparam file [TemporaryFile]
143
+ # @return [void]
144
+ def get_file(prefix_key)
145
+ get_factory(prefix_key) { |factory| yield factory.current }
146
+ nil # void return avoid leaking unsynchronized access
147
+ end
148
+
149
+ def shutdown
150
+ stop_stale_sweeper
151
+ end
152
+
153
+ def size
154
+ @prefixed_factories.size
155
+ end
156
+
157
+ def remove_if_stale(prefix_key)
158
+ # we use the ATOMIC `ConcurrentHashMap#computeIfPresent` to atomically
159
+ # detect the staleness, mark a stale prefixed factory as deleted, and delete from the map.
160
+ @prefixed_factories.computeIfPresent(prefix_key) do |_, prefixed_factory|
161
+ # once we have retrieved an instance, we acquire exclusive access to it
162
+ # for stale detection, marking it as deleted before releasing the lock
163
+ # and causing it to become deleted from the map.
164
+ prefixed_factory.with_lock do |_|
165
+ if prefixed_factory.stale?
166
+ prefixed_factory.delete! # mark deleted to prevent reuse
167
+ nil # cause deletion
168
+ else
169
+ prefixed_factory # keep existing
170
+ end
171
+ end
172
+ end
173
+ end
174
+
175
+ def start_stale_sweeper
176
+ @stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
177
+ LogStash::Util.set_thread_name("S3, Stale factory sweeper")
178
+
179
+ @prefixed_factories.keys.each do |prefix|
180
+ remove_if_stale(prefix)
181
+ end
182
+ end
183
+
184
+ @stale_sweeper.execute
185
+ end
186
+
187
+ def stop_stale_sweeper
188
+ @stale_sweeper.shutdown
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class PathValidator
6
+ INVALID_CHARACTERS = "\^`><"
7
+
8
+ def self.valid?(name)
9
+ name.match(matches_re).nil?
10
+ end
11
+
12
+ def self.matches_re
13
+ /[#{Regexp.escape(INVALID_CHARACTERS)}]/
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/s3/size_rotation_policy"
3
+ require "logstash/outputs/s3/time_rotation_policy"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class S3
8
+ class SizeAndTimeRotationPolicy
9
+ def initialize(file_size, time_file)
10
+ @size_strategy = SizeRotationPolicy.new(file_size)
11
+ @time_strategy = TimeRotationPolicy.new(time_file)
12
+ end
13
+
14
+ def rotate?(file)
15
+ @size_strategy.rotate?(file) || @time_strategy.rotate?(file)
16
+ end
17
+
18
+ def needs_periodic?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class SizeRotationPolicy
6
+ attr_reader :size_file
7
+
8
+ def initialize(size_file)
9
+ if size_file <= 0
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
11
+ end
12
+
13
+ @size_file = size_file
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size >= size_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ false
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end