logstash-integration-aws 7.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +33 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/VERSION +1 -0
  10. data/docs/codec-cloudfront.asciidoc +53 -0
  11. data/docs/codec-cloudtrail.asciidoc +45 -0
  12. data/docs/index.asciidoc +36 -0
  13. data/docs/input-cloudwatch.asciidoc +320 -0
  14. data/docs/input-s3.asciidoc +346 -0
  15. data/docs/input-sqs.asciidoc +287 -0
  16. data/docs/output-cloudwatch.asciidoc +321 -0
  17. data/docs/output-s3.asciidoc +442 -0
  18. data/docs/output-sns.asciidoc +166 -0
  19. data/docs/output-sqs.asciidoc +242 -0
  20. data/lib/logstash/codecs/cloudfront.rb +84 -0
  21. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  22. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  23. data/lib/logstash/inputs/s3.rb +466 -0
  24. data/lib/logstash/inputs/sqs.rb +196 -0
  25. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  26. data/lib/logstash/outputs/s3/file_repository.rb +193 -0
  27. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  28. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  29. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  30. data/lib/logstash/outputs/s3/temporary_file.rb +114 -0
  31. data/lib/logstash/outputs/s3/temporary_file_factory.rb +126 -0
  32. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  33. data/lib/logstash/outputs/s3/uploader.rb +76 -0
  34. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  35. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  36. data/lib/logstash/outputs/s3.rb +442 -0
  37. data/lib/logstash/outputs/sns.rb +133 -0
  38. data/lib/logstash/outputs/sqs.rb +167 -0
  39. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  40. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  41. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  42. data/lib/logstash-integration-aws_jars.rb +4 -0
  43. data/lib/tasks/build.rake +15 -0
  44. data/logstash-integration-aws.gemspec +55 -0
  45. data/spec/codecs/cloudfront_spec.rb +92 -0
  46. data/spec/codecs/cloudtrail_spec.rb +56 -0
  47. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  48. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  49. data/spec/fixtures/cloudfront.log +4 -0
  50. data/spec/fixtures/compressed.log.gee.zip +0 -0
  51. data/spec/fixtures/compressed.log.gz +0 -0
  52. data/spec/fixtures/compressed.log.gzip +0 -0
  53. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  54. data/spec/fixtures/json.log +2 -0
  55. data/spec/fixtures/json_with_message.log +2 -0
  56. data/spec/fixtures/multiline.log +6 -0
  57. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  58. data/spec/fixtures/uncompressed.log +2 -0
  59. data/spec/inputs/cloudwatch_spec.rb +85 -0
  60. data/spec/inputs/s3_spec.rb +610 -0
  61. data/spec/inputs/sincedb_spec.rb +17 -0
  62. data/spec/inputs/sqs_spec.rb +324 -0
  63. data/spec/integration/cloudwatch_spec.rb +25 -0
  64. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  65. data/spec/integration/gzip_file_spec.rb +62 -0
  66. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  67. data/spec/integration/outputs/sqs_spec.rb +98 -0
  68. data/spec/integration/restore_from_crash_spec.rb +133 -0
  69. data/spec/integration/s3_spec.rb +66 -0
  70. data/spec/integration/size_rotation_spec.rb +59 -0
  71. data/spec/integration/sqs_spec.rb +110 -0
  72. data/spec/integration/stress_test_spec.rb +60 -0
  73. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  74. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  75. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  76. data/spec/outputs/cloudwatch_spec.rb +38 -0
  77. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  78. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  79. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  80. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  81. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  82. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  83. data/spec/outputs/s3/uploader_spec.rb +69 -0
  84. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  85. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  86. data/spec/outputs/s3_spec.rb +232 -0
  87. data/spec/outputs/sns_spec.rb +160 -0
  88. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  89. data/spec/spec_helper.rb +8 -0
  90. data/spec/support/helpers.rb +121 -0
  91. data/spec/unit/outputs/sqs_spec.rb +247 -0
  92. data/vendor/jar-dependencies/org/logstash/plugins/integration/aws/logstash-integration-aws/7.1.1/logstash-integration-aws-7.1.1.jar +0 -0
  93. metadata +472 -0
@@ -0,0 +1,346 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/plugin_mixins/aws_config"
5
+
6
+ require "rufus/scheduler"
7
+
8
+ # This output lets you aggregate and send metric data to AWS CloudWatch
9
+ #
10
+ # ==== Summary:
11
+ # This plugin is intended to be used on a logstash indexer agent (but that
12
+ # is not the only way, see below.) In the intended scenario, one cloudwatch
13
+ # output plugin is configured, on the logstash indexer node, with just AWS API
14
+ # credentials, and possibly a region and/or a namespace. The output looks
15
+ # for fields present in events, and when it finds them, it uses them to
16
+ # calculate aggregate statistics. If the `metricname` option is set in this
17
+ # output, then any events which pass through it will be aggregated & sent to
18
+ # CloudWatch, but that is not recommended. The intended use is to NOT set the
19
+ # metricname option here, and instead to add a `CW_metricname` field (and other
20
+ # fields) to only the events you want sent to CloudWatch.
21
+ #
22
+ # When events pass through this output they are queued for background
23
+ # aggregation and sending, which happens every minute by default. The
24
+ # queue has a maximum size, and when it is full aggregated statistics will be
25
+ # sent to CloudWatch ahead of schedule. Whenever this happens a warning
26
+ # message is written to logstash's log. If you see this you should increase
27
+ # the `queue_size` configuration option to avoid the extra API calls. The queue
28
+ # is emptied every time we send data to CloudWatch.
29
+ #
30
+ # Note: when logstash is stopped the queue is destroyed before it can be processed.
31
+ # This is a known limitation of logstash and will hopefully be addressed in a
32
+ # future version.
33
+ #
34
+ # ==== Details:
35
+ # There are two ways to configure this plugin, and they can be used in
36
+ # combination: event fields & per-output defaults
37
+ #
38
+ # Event Field configuration...
39
+ # You add fields to your events in inputs & filters and this output reads
40
+ # those fields to aggregate events. The names of the fields read are
41
+ # configurable via the `field_*` options.
42
+ #
43
+ # Per-output defaults...
44
+ # You set universal defaults in this output plugin's configuration, and
45
+ # if an event does not have a field for that option then the default is
46
+ # used.
47
+ #
48
+ # Notice, the event fields take precedence over the per-output defaults.
49
+ #
50
+ # At a minimum events must have a "metric name" to be sent to CloudWatch.
51
+ # This can be achieved either by providing a default here OR by adding a
52
+ # `CW_metricname` field. By default, if no other configuration is provided
53
+ # besides a metric name, then events will be counted (Unit: Count, Value: 1)
54
+ # by their metric name (either a default or from their `CW_metricname` field)
55
+ #
56
+ # Other fields which can be added to events to modify the behavior of this
57
+ # plugin are, `CW_namespace`, `CW_unit`, `CW_value`, and
58
+ # `CW_dimensions`. All of these field names are configurable in
59
+ # this output. You can also set per-output defaults for any of them.
60
+ # See below for details.
61
+ #
62
+ # Read more about http://aws.amazon.com/cloudwatch/[AWS CloudWatch],
63
+ # and the specific of API endpoint this output uses,
64
+ # http://docs.amazonwebservices.com/AmazonCloudWatch/latest/APIReference/API_PutMetricData.html[PutMetricData]
65
+ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base
66
+ include LogStash::PluginMixins::AwsConfig::V2
67
+
68
+ config_name "cloudwatch"
69
+
70
+ # Constants
71
+ # aggregate_key members
72
+ DIMENSIONS = "dimensions"
73
+ TIMESTAMP = "timestamp"
74
+ METRIC = "metric"
75
+ COUNT = "count"
76
+ UNIT = "unit"
77
+ SUM = "sum"
78
+ MIN = "min"
79
+ MAX = "max"
80
+ # Units
81
+ COUNT_UNIT = "Count"
82
+ NONE = "None"
83
+
84
+ # How often to send data to CloudWatch
85
+ # This does not affect the event timestamps, events will always have their
86
+ # actual timestamp (to-the-minute) sent to CloudWatch.
87
+ #
88
+ # We only call the API if there is data to send.
89
+ #
90
+ # See the Rufus Scheduler docs for an https://github.com/jmettraux/rufus-scheduler#the-time-strings-understood-by-rufus-scheduler[explanation of allowed values]
91
+ config :timeframe, :validate => :string, :default => "1m"
92
+
93
+ # How many events to queue before forcing a call to the CloudWatch API ahead of `timeframe` schedule
94
+ # Set this to the number of events-per-timeframe you will be sending to CloudWatch to avoid extra API calls
95
+ config :queue_size, :validate => :number, :default => 10000
96
+
97
+ # How many data points can be given in one call to the CloudWatch API
98
+ config :batch_size, :validate => :number, :default => 20
99
+
100
+ # The default namespace to use for events which do not have a `CW_namespace` field
101
+ config :namespace, :validate => :string, :default => "Logstash"
102
+
103
+ # The name of the field used to set a different namespace per event
104
+ # Note: Only one namespace can be sent to CloudWatch per API call
105
+ # so setting different namespaces will increase the number of API calls
106
+ # and those cost money.
107
+ config :field_namespace, :validate => :string, :default => "CW_namespace"
108
+
109
+ # The default metric name to use for events which do not have a `CW_metricname` field.
110
+ # Beware: If this is provided then all events which pass through this output will be aggregated and
111
+ # sent to CloudWatch, so use this carefully. Furthermore, when providing this option, you
112
+ # will probably want to also restrict events from passing through this output using event
113
+ # type, tag, and field matching
114
+ config :metricname, :validate => :string
115
+
116
+ # The name of the field used to set the metric name on an event
117
+ # The author of this plugin recommends adding this field to events in inputs &
118
+ # filters rather than using the per-output default setting so that one output
119
+ # plugin on your logstash indexer can serve all events (which of course had
120
+ # fields set on your logstash shippers.)
121
+ config :field_metricname, :validate => :string, :default => "CW_metricname"
122
+
123
+ VALID_UNITS = ["Seconds", "Microseconds", "Milliseconds", "Bytes",
124
+ "Kilobytes", "Megabytes", "Gigabytes", "Terabytes",
125
+ "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits",
126
+ "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second",
127
+ "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second",
128
+ "Bits/Second", "Kilobits/Second", "Megabits/Second",
129
+ "Gigabits/Second", "Terabits/Second", "Count/Second", NONE]
130
+
131
+ # The default unit to use for events which do not have a `CW_unit` field
132
+ # If you set this option you should probably set the "value" option along with it
133
+ config :unit, :validate => VALID_UNITS, :default => COUNT_UNIT
134
+
135
+ # The name of the field used to set the unit on an event metric
136
+ config :field_unit, :validate => :string, :default => "CW_unit"
137
+
138
+ # The default value to use for events which do not have a `CW_value` field
139
+ # If provided, this must be a string which can be converted to a float, for example...
140
+ # "1", "2.34", ".5", and "0.67"
141
+ # If you set this option you should probably set the `unit` option along with it
142
+ config :value, :validate => :string, :default => "1"
143
+
144
+ # The name of the field used to set the value (float) on an event metric
145
+ config :field_value, :validate => :string, :default => "CW_value"
146
+
147
+ # The default dimensions [ name, value, ... ] to use for events which do not have a `CW_dimensions` field
148
+ config :dimensions, :validate => :hash
149
+
150
+ # The name of the field used to set the dimensions on an event metric
151
+ # The field named here, if present in an event, must have an array of
152
+ # one or more key & value pairs, for example...
153
+ # `add_field => [ "CW_dimensions", "Environment", "CW_dimensions", "prod" ]`
154
+ # or, equivalently...
155
+ # `add_field => [ "CW_dimensions", "Environment" ]`
156
+ # `add_field => [ "CW_dimensions", "prod" ]`
157
+ config :field_dimensions, :validate => :string, :default => "CW_dimensions"
158
+
159
+ attr_reader :event_queue
160
+
161
+ public
162
+ def register
163
+ require "thread"
164
+ require "aws-sdk-cloudwatch"
165
+
166
+ @cw = Aws::CloudWatch::Client.new(aws_options_hash)
167
+
168
+ @event_queue = SizedQueue.new(@queue_size)
169
+ @scheduler = Rufus::Scheduler.new
170
+ @job = @scheduler.schedule_every @timeframe do
171
+ @logger.debug("Scheduler Activated")
172
+ publish(aggregate({}))
173
+ end
174
+ end # def register
175
+
176
+ # Rufus::Scheduler >= 3.4 moved the Time impl into a gem EoTime = ::EtOrbi::EoTime`
177
+ # Rufus::Scheduler 3.1 - 3.3 using it's own Time impl `Rufus::Scheduler::ZoTime`
178
+ RufusTimeImpl = defined?(Rufus::Scheduler::EoTime) ? Rufus::Scheduler::EoTime :
179
+ (defined?(Rufus::Scheduler::ZoTime) ? Rufus::Scheduler::ZoTime : ::Time)
180
+
181
+ public
182
+ def receive(event)
183
+ return unless (event.get(@field_metricname) || @metricname)
184
+
185
+ if (@event_queue.length >= @event_queue.max)
186
+ @job.trigger RufusTimeImpl.now
187
+ @logger.warn("Posted to AWS CloudWatch ahead of schedule. If you see this often, consider increasing the cloudwatch queue_size option.")
188
+ end
189
+
190
+ @logger.debug("Queueing event", :event => event)
191
+ @event_queue << event
192
+ end # def receive
193
+
194
+ private
195
+ def publish(aggregates)
196
+ aggregates.each do |namespace, data|
197
+ @logger.debug("Namespace, data: ", :namespace => namespace, :data => data)
198
+ metric_data = []
199
+ data.each do |aggregate_key, stats|
200
+ new_data = {
201
+ :metric_name => aggregate_key[METRIC],
202
+ :timestamp => aggregate_key[TIMESTAMP],
203
+ :unit => aggregate_key[UNIT],
204
+ :statistic_values => {
205
+ :sample_count => stats[COUNT],
206
+ :sum => stats[SUM],
207
+ :minimum => stats[MIN],
208
+ :maximum => stats[MAX],
209
+ }
210
+ }
211
+ dims = aggregate_key[DIMENSIONS]
212
+ if (dims.is_a?(Array) && dims.length > 0 && (dims.length % 2) == 0)
213
+ new_data[:dimensions] = Array.new
214
+ i = 0
215
+ while (i < dims.length)
216
+ new_data[:dimensions] << {:name => dims[i], :value => dims[i+1]}
217
+ i += 2
218
+ end
219
+ end
220
+ metric_data << new_data
221
+ end # data.each
222
+
223
+ metric_data.each_slice(@batch_size) do |batch|
224
+ begin
225
+ @cw.put_metric_data(
226
+ :namespace => namespace,
227
+ :metric_data => batch
228
+ )
229
+ @logger.debug("Sent data to AWS CloudWatch OK", :namespace => namespace, :metric_data => batch)
230
+ rescue Exception => e
231
+ @logger.warn("Failed to send to AWS CloudWatch", :exception => e, :namespace => namespace, :metric_data => batch)
232
+ break
233
+ end
234
+ end
235
+ end # aggregates.each
236
+ return aggregates
237
+ end# def publish
238
+
239
+ private
240
+ def aggregate(aggregates)
241
+ @logger.debug("QUEUE SIZE ", :queuesize => @event_queue.size)
242
+ while !@event_queue.empty? do
243
+ begin
244
+ count(aggregates, @event_queue.pop(true))
245
+ rescue Exception => e
246
+ @logger.warn("Exception! Breaking count loop", :exception => e)
247
+ break
248
+ end
249
+ end
250
+ return aggregates
251
+ end # def aggregate
252
+
253
+ private
254
+ def count(aggregates, event)
255
+ # If the event doesn't declare a namespace, use the default
256
+ fnamespace = field(event, @field_namespace)
257
+ namespace = (fnamespace ? fnamespace : event.sprintf(@namespace))
258
+
259
+ funit = field(event, @field_unit)
260
+ unit = (funit ? funit : event.sprintf(@unit))
261
+
262
+ fvalue = field(event, @field_value)
263
+ value = (fvalue ? fvalue : event.sprintf(@value))
264
+
265
+ # We may get to this point with valid Units but missing value. Send zeros.
266
+ val = (!value) ? 0.0 : value.to_f
267
+
268
+ # Event provides exactly one (but not both) of value or unit
269
+ if ( (fvalue == nil) ^ (funit == nil) )
270
+ @logger.warn("Likely config error: event has one of #{@field_value} or #{@field_unit} fields but not both.", :event => event)
271
+ end
272
+
273
+ # If Unit is still not set or is invalid warn about misconfiguration & use NONE
274
+ if (!VALID_UNITS.include?(unit))
275
+ unit = NONE
276
+ @logger.warn("Likely config error: invalid or missing Units (#{unit.to_s}), using '#{NONE}' instead", :event => event)
277
+ end
278
+
279
+ if (!aggregates[namespace])
280
+ aggregates[namespace] = {}
281
+ end
282
+
283
+ dims = event.get(@field_dimensions)
284
+ if (dims) # event provides dimensions
285
+ # validate the structure
286
+ if (!dims.is_a?(Array) || dims.length == 0 || (dims.length % 2) != 0)
287
+ @logger.warn("Likely config error: CloudWatch dimensions field (#{dims.to_s}) found which is not a positive- & even-length array. Ignoring it.", :event => event)
288
+ dims = nil
289
+ end
290
+ # Best case, we get here and exit the conditional because dims...
291
+ # - is an array
292
+ # - with positive length
293
+ # - and an even number of elements
294
+ elsif (@dimensions.is_a?(Hash)) # event did not provide dimensions, but the output has been configured with a default
295
+ dims = @dimensions.flatten.map{|d| event.sprintf(d)} # into the kind of array described just above
296
+ else
297
+ dims = nil
298
+ end
299
+
300
+ fmetric = field(event, @field_metricname)
301
+ aggregate_key = {
302
+ METRIC => (fmetric ? fmetric : event.sprintf(@metricname)),
303
+ DIMENSIONS => dims,
304
+ UNIT => unit,
305
+ TIMESTAMP => event.sprintf("%{+YYYY-MM-dd'T'HH:mm:00Z}")
306
+ }
307
+
308
+ if (!aggregates[namespace][aggregate_key])
309
+ aggregates[namespace][aggregate_key] = {}
310
+ end
311
+
312
+ if (!aggregates[namespace][aggregate_key][MAX] || val > aggregates[namespace][aggregate_key][MAX])
313
+ aggregates[namespace][aggregate_key][MAX] = val
314
+ end
315
+
316
+ if (!aggregates[namespace][aggregate_key][MIN] || val < aggregates[namespace][aggregate_key][MIN])
317
+ aggregates[namespace][aggregate_key][MIN] = val
318
+ end
319
+
320
+ if (!aggregates[namespace][aggregate_key][COUNT])
321
+ aggregates[namespace][aggregate_key][COUNT] = 1
322
+ else
323
+ aggregates[namespace][aggregate_key][COUNT] += 1
324
+ end
325
+
326
+ if (!aggregates[namespace][aggregate_key][SUM])
327
+ aggregates[namespace][aggregate_key][SUM] = val
328
+ else
329
+ aggregates[namespace][aggregate_key][SUM] += val
330
+ end
331
+ end # def count
332
+
333
+ private
334
+ def field(event, fieldname)
335
+ if !event.get(fieldname)
336
+ return nil
337
+ else
338
+ if event.get(fieldname).is_a?(Array)
339
+ return event.get(fieldname).first
340
+ else
341
+ return event.get(fieldname)
342
+ end
343
+ end
344
+ end # def field
345
+
346
+ end # class LogStash::Outputs::CloudWatch
@@ -0,0 +1,193 @@
1
+ # encoding: utf-8
2
+ require "java"
3
+ require "concurrent"
4
+ require "concurrent/timer_task"
5
+ require "logstash/util"
6
+
7
+ ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
8
+
9
+ module LogStash
10
+ module Outputs
11
+ class S3
12
+ class FileRepository
13
+ DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
14
+ DEFAULT_STALE_TIME_SECS = 15 * 60
15
+ # Ensure that all access or work done
16
+ # on a factory is threadsafe
17
+ class PrefixedValue
18
+ def initialize(file_factory, stale_time)
19
+ @file_factory = file_factory
20
+ @lock = Monitor.new # reentrant Mutex
21
+ @stale_time = stale_time
22
+ @is_deleted = false
23
+ end
24
+
25
+ def with_lock
26
+ @lock.synchronize {
27
+ yield @file_factory
28
+ }
29
+ end
30
+
31
+ def stale?
32
+ with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
33
+ end
34
+
35
+ def apply(prefix)
36
+ return self
37
+ end
38
+
39
+ def delete!
40
+ with_lock do |factory|
41
+ factory.current.delete!
42
+ @is_deleted = true
43
+ end
44
+ end
45
+
46
+ def deleted?
47
+ with_lock { |_| @is_deleted }
48
+ end
49
+ end
50
+
51
+ class FactoryInitializer
52
+ include java.util.function.Function
53
+ def initialize(tags, encoding, temporary_directory, stale_time)
54
+ @tags = tags
55
+ @encoding = encoding
56
+ @temporary_directory = temporary_directory
57
+ @stale_time = stale_time
58
+ end
59
+
60
+ def apply(prefix_key)
61
+ PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
62
+ end
63
+ end
64
+
65
+ def initialize(tags, encoding, temporary_directory,
66
+ stale_time = DEFAULT_STALE_TIME_SECS,
67
+ sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
68
+ # The path need to contains the prefix so when we start
69
+ # logtash after a crash we keep the remote structure
70
+ @prefixed_factories = ConcurrentHashMap.new
71
+
72
+ @sweeper_interval = sweeper_interval
73
+
74
+ @factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
75
+
76
+ start_stale_sweeper
77
+ end
78
+
79
+ def keys
80
+ @prefixed_factories.keySet
81
+ end
82
+
83
+ ##
84
+ # Yields the current file of each non-deleted file factory while the current thread has exclusive access to it.
85
+ # @yieldparam file [TemporaryFile]
86
+ # @return [void]
87
+ def each_files
88
+ each_factory(keys) do |factory|
89
+ yield factory.current
90
+ end
91
+ nil # void return avoid leaking unsynchronized access
92
+ end
93
+
94
+ ##
95
+ # Yields the file factory while the current thread has exclusive access to it, creating a new
96
+ # one if one does not exist or if the current one is being reaped by the stale watcher.
97
+ # @param prefix_key [String]: the prefix key
98
+ # @yieldparam factory [TemporaryFileFactory]: a temporary file factory that this thread has exclusive access to
99
+ # @return [void]
100
+ def get_factory(prefix_key)
101
+ # fast-path: if factory exists and is not deleted, yield it with exclusive access and return
102
+ prefix_val = @prefixed_factories.get(prefix_key)
103
+ prefix_val&.with_lock do |factory|
104
+ # intentional local-jump to ensure deletion detection
105
+ # is done inside the exclusive access.
106
+ unless prefix_val.deleted?
107
+ yield(factory)
108
+ return nil # void return avoid leaking unsynchronized access
109
+ end
110
+ end
111
+
112
+ # slow-path:
113
+ # the ConcurrentHashMap#get operation is lock-free, but may have returned an entry that was being deleted by
114
+ # another thread (such as via stale detection). If we failed to retrieve a value, or retrieved one that had
115
+ # been marked deleted, use the atomic ConcurrentHashMap#compute to retrieve a non-deleted entry.
116
+ prefix_val = @prefixed_factories.compute(prefix_key) do |_, existing|
117
+ existing && !existing.deleted? ? existing : @factory_initializer.apply(prefix_key)
118
+ end
119
+ prefix_val.with_lock { |factory| yield factory }
120
+ nil # void return avoid leaking unsynchronized access
121
+ end
122
+
123
+ ##
124
+ # Yields each non-deleted file factory while the current thread has exclusive access to it.
125
+ # @param prefixes [Array<String>]: the prefix keys
126
+ # @yieldparam factory [TemporaryFileFactory]
127
+ # @return [void]
128
+ def each_factory(prefixes)
129
+ prefixes.each do |prefix_key|
130
+ prefix_val = @prefixed_factories.get(prefix_key)
131
+ prefix_val&.with_lock do |factory|
132
+ yield factory unless prefix_val.deleted?
133
+ end
134
+ end
135
+ nil # void return avoid leaking unsynchronized access
136
+ end
137
+
138
+ ##
139
+ # Ensures that a non-deleted factory exists for the provided prefix and yields its current file
140
+ # while the current thread has exclusive access to it.
141
+ # @param prefix_key [String]
142
+ # @yieldparam file [TemporaryFile]
143
+ # @return [void]
144
+ def get_file(prefix_key)
145
+ get_factory(prefix_key) { |factory| yield factory.current }
146
+ nil # void return avoid leaking unsynchronized access
147
+ end
148
+
149
+ def shutdown
150
+ stop_stale_sweeper
151
+ end
152
+
153
+ def size
154
+ @prefixed_factories.size
155
+ end
156
+
157
+ def remove_if_stale(prefix_key)
158
+ # we use the ATOMIC `ConcurrentHashMap#computeIfPresent` to atomically
159
+ # detect the staleness, mark a stale prefixed factory as deleted, and delete from the map.
160
+ @prefixed_factories.computeIfPresent(prefix_key) do |_, prefixed_factory|
161
+ # once we have retrieved an instance, we acquire exclusive access to it
162
+ # for stale detection, marking it as deleted before releasing the lock
163
+ # and causing it to become deleted from the map.
164
+ prefixed_factory.with_lock do |_|
165
+ if prefixed_factory.stale?
166
+ prefixed_factory.delete! # mark deleted to prevent reuse
167
+ nil # cause deletion
168
+ else
169
+ prefixed_factory # keep existing
170
+ end
171
+ end
172
+ end
173
+ end
174
+
175
+ def start_stale_sweeper
176
+ @stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
177
+ LogStash::Util.set_thread_name("S3, Stale factory sweeper")
178
+
179
+ @prefixed_factories.keys.each do |prefix|
180
+ remove_if_stale(prefix)
181
+ end
182
+ end
183
+
184
+ @stale_sweeper.execute
185
+ end
186
+
187
+ def stop_stale_sweeper
188
+ @stale_sweeper.shutdown
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class PathValidator
6
+ INVALID_CHARACTERS = "\^`><"
7
+
8
+ def self.valid?(name)
9
+ name.match(matches_re).nil?
10
+ end
11
+
12
+ def self.matches_re
13
+ /[#{Regexp.escape(INVALID_CHARACTERS)}]/
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/s3/size_rotation_policy"
3
+ require "logstash/outputs/s3/time_rotation_policy"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class S3
8
+ class SizeAndTimeRotationPolicy
9
+ def initialize(file_size, time_file)
10
+ @size_strategy = SizeRotationPolicy.new(file_size)
11
+ @time_strategy = TimeRotationPolicy.new(time_file)
12
+ end
13
+
14
+ def rotate?(file)
15
+ @size_strategy.rotate?(file) || @time_strategy.rotate?(file)
16
+ end
17
+
18
+ def needs_periodic?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class SizeRotationPolicy
6
+ attr_reader :size_file
7
+
8
+ def initialize(size_file)
9
+ if size_file <= 0
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
11
+ end
12
+
13
+ @size_file = size_file
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size >= size_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ false
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end