logstash-integration-aws 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +15 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/docs/codec-cloudfront.asciidoc +53 -0
  10. data/docs/codec-cloudtrail.asciidoc +45 -0
  11. data/docs/index.asciidoc +38 -0
  12. data/docs/input-cloudwatch.asciidoc +320 -0
  13. data/docs/input-s3.asciidoc +346 -0
  14. data/docs/input-sqs.asciidoc +287 -0
  15. data/docs/output-cloudwatch.asciidoc +321 -0
  16. data/docs/output-s3.asciidoc +442 -0
  17. data/docs/output-sns.asciidoc +166 -0
  18. data/docs/output-sqs.asciidoc +242 -0
  19. data/lib/logstash/codecs/cloudfront.rb +84 -0
  20. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  21. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  22. data/lib/logstash/inputs/s3.rb +466 -0
  23. data/lib/logstash/inputs/sqs.rb +196 -0
  24. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  25. data/lib/logstash/outputs/s3/file_repository.rb +121 -0
  26. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  27. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  28. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  29. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  30. data/lib/logstash/outputs/s3/temporary_file_factory.rb +129 -0
  31. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  32. data/lib/logstash/outputs/s3/uploader.rb +74 -0
  33. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  34. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  35. data/lib/logstash/outputs/s3.rb +405 -0
  36. data/lib/logstash/outputs/sns.rb +133 -0
  37. data/lib/logstash/outputs/sqs.rb +167 -0
  38. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  39. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  40. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  41. data/logstash-integration-aws.gemspec +52 -0
  42. data/spec/codecs/cloudfront_spec.rb +92 -0
  43. data/spec/codecs/cloudtrail_spec.rb +56 -0
  44. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  45. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  46. data/spec/fixtures/cloudfront.log +4 -0
  47. data/spec/fixtures/compressed.log.gee.zip +0 -0
  48. data/spec/fixtures/compressed.log.gz +0 -0
  49. data/spec/fixtures/compressed.log.gzip +0 -0
  50. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  51. data/spec/fixtures/json.log +2 -0
  52. data/spec/fixtures/json_with_message.log +2 -0
  53. data/spec/fixtures/multiline.log +6 -0
  54. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  55. data/spec/fixtures/uncompressed.log +2 -0
  56. data/spec/inputs/cloudwatch_spec.rb +85 -0
  57. data/spec/inputs/s3_spec.rb +610 -0
  58. data/spec/inputs/sincedb_spec.rb +17 -0
  59. data/spec/inputs/sqs_spec.rb +324 -0
  60. data/spec/integration/cloudwatch_spec.rb +25 -0
  61. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  62. data/spec/integration/gzip_file_spec.rb +62 -0
  63. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  64. data/spec/integration/outputs/sqs_spec.rb +98 -0
  65. data/spec/integration/restore_from_crash_spec.rb +67 -0
  66. data/spec/integration/s3_spec.rb +66 -0
  67. data/spec/integration/size_rotation_spec.rb +59 -0
  68. data/spec/integration/sqs_spec.rb +110 -0
  69. data/spec/integration/stress_test_spec.rb +60 -0
  70. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  71. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  72. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  73. data/spec/outputs/cloudwatch_spec.rb +38 -0
  74. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  75. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  76. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  77. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  78. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  79. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  80. data/spec/outputs/s3/uploader_spec.rb +69 -0
  81. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  82. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  83. data/spec/outputs/s3_spec.rb +232 -0
  84. data/spec/outputs/sns_spec.rb +160 -0
  85. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  86. data/spec/spec_helper.rb +8 -0
  87. data/spec/support/helpers.rb +119 -0
  88. data/spec/unit/outputs/sqs_spec.rb +247 -0
  89. metadata +467 -0
@@ -0,0 +1,346 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/plugin_mixins/aws_config"
5
+
6
+ require "rufus/scheduler"
7
+
8
+ # This output lets you aggregate and send metric data to AWS CloudWatch
9
+ #
10
+ # ==== Summary:
11
+ # This plugin is intended to be used on a logstash indexer agent (but that
12
+ # is not the only way, see below.) In the intended scenario, one cloudwatch
13
+ # output plugin is configured, on the logstash indexer node, with just AWS API
14
+ # credentials, and possibly a region and/or a namespace. The output looks
15
+ # for fields present in events, and when it finds them, it uses them to
16
+ # calculate aggregate statistics. If the `metricname` option is set in this
17
+ # output, then any events which pass through it will be aggregated & sent to
18
+ # CloudWatch, but that is not recommended. The intended use is to NOT set the
19
+ # metricname option here, and instead to add a `CW_metricname` field (and other
20
+ # fields) to only the events you want sent to CloudWatch.
21
+ #
22
+ # When events pass through this output they are queued for background
23
+ # aggregation and sending, which happens every minute by default. The
24
+ # queue has a maximum size, and when it is full aggregated statistics will be
25
+ # sent to CloudWatch ahead of schedule. Whenever this happens a warning
26
+ # message is written to logstash's log. If you see this you should increase
27
+ # the `queue_size` configuration option to avoid the extra API calls. The queue
28
+ # is emptied every time we send data to CloudWatch.
29
+ #
30
+ # Note: when logstash is stopped the queue is destroyed before it can be processed.
31
+ # This is a known limitation of logstash and will hopefully be addressed in a
32
+ # future version.
33
+ #
34
+ # ==== Details:
35
+ # There are two ways to configure this plugin, and they can be used in
36
+ # combination: event fields & per-output defaults
37
+ #
38
+ # Event Field configuration...
39
+ # You add fields to your events in inputs & filters and this output reads
40
+ # those fields to aggregate events. The names of the fields read are
41
+ # configurable via the `field_*` options.
42
+ #
43
+ # Per-output defaults...
44
+ # You set universal defaults in this output plugin's configuration, and
45
+ # if an event does not have a field for that option then the default is
46
+ # used.
47
+ #
48
+ # Notice, the event fields take precedence over the per-output defaults.
49
+ #
50
+ # At a minimum events must have a "metric name" to be sent to CloudWatch.
51
+ # This can be achieved either by providing a default here OR by adding a
52
+ # `CW_metricname` field. By default, if no other configuration is provided
53
+ # besides a metric name, then events will be counted (Unit: Count, Value: 1)
54
+ # by their metric name (either a default or from their `CW_metricname` field)
55
+ #
56
+ # Other fields which can be added to events to modify the behavior of this
57
+ # plugin are, `CW_namespace`, `CW_unit`, `CW_value`, and
58
+ # `CW_dimensions`. All of these field names are configurable in
59
+ # this output. You can also set per-output defaults for any of them.
60
+ # See below for details.
61
+ #
62
+ # Read more about http://aws.amazon.com/cloudwatch/[AWS CloudWatch],
63
+ # and the specific of API endpoint this output uses,
64
+ # http://docs.amazonwebservices.com/AmazonCloudWatch/latest/APIReference/API_PutMetricData.html[PutMetricData]
65
+ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base
66
+ include LogStash::PluginMixins::AwsConfig::V2
67
+
68
+ config_name "cloudwatch"
69
+
70
+ # Constants
71
+ # aggregate_key members
72
+ DIMENSIONS = "dimensions"
73
+ TIMESTAMP = "timestamp"
74
+ METRIC = "metric"
75
+ COUNT = "count"
76
+ UNIT = "unit"
77
+ SUM = "sum"
78
+ MIN = "min"
79
+ MAX = "max"
80
+ # Units
81
+ COUNT_UNIT = "Count"
82
+ NONE = "None"
83
+
84
+ # How often to send data to CloudWatch
85
+ # This does not affect the event timestamps, events will always have their
86
+ # actual timestamp (to-the-minute) sent to CloudWatch.
87
+ #
88
+ # We only call the API if there is data to send.
89
+ #
90
+ # See the Rufus Scheduler docs for an https://github.com/jmettraux/rufus-scheduler#the-time-strings-understood-by-rufus-scheduler[explanation of allowed values]
91
+ config :timeframe, :validate => :string, :default => "1m"
92
+
93
+ # How many events to queue before forcing a call to the CloudWatch API ahead of `timeframe` schedule
94
+ # Set this to the number of events-per-timeframe you will be sending to CloudWatch to avoid extra API calls
95
+ config :queue_size, :validate => :number, :default => 10000
96
+
97
+ # How many data points can be given in one call to the CloudWatch API
98
+ config :batch_size, :validate => :number, :default => 20
99
+
100
+ # The default namespace to use for events which do not have a `CW_namespace` field
101
+ config :namespace, :validate => :string, :default => "Logstash"
102
+
103
+ # The name of the field used to set a different namespace per event
104
+ # Note: Only one namespace can be sent to CloudWatch per API call
105
+ # so setting different namespaces will increase the number of API calls
106
+ # and those cost money.
107
+ config :field_namespace, :validate => :string, :default => "CW_namespace"
108
+
109
+ # The default metric name to use for events which do not have a `CW_metricname` field.
110
+ # Beware: If this is provided then all events which pass through this output will be aggregated and
111
+ # sent to CloudWatch, so use this carefully. Furthermore, when providing this option, you
112
+ # will probably want to also restrict events from passing through this output using event
113
+ # type, tag, and field matching
114
+ config :metricname, :validate => :string
115
+
116
+ # The name of the field used to set the metric name on an event
117
+ # The author of this plugin recommends adding this field to events in inputs &
118
+ # filters rather than using the per-output default setting so that one output
119
+ # plugin on your logstash indexer can serve all events (which of course had
120
+ # fields set on your logstash shippers.)
121
+ config :field_metricname, :validate => :string, :default => "CW_metricname"
122
+
123
+ VALID_UNITS = ["Seconds", "Microseconds", "Milliseconds", "Bytes",
124
+ "Kilobytes", "Megabytes", "Gigabytes", "Terabytes",
125
+ "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits",
126
+ "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second",
127
+ "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second",
128
+ "Bits/Second", "Kilobits/Second", "Megabits/Second",
129
+ "Gigabits/Second", "Terabits/Second", "Count/Second", NONE]
130
+
131
+ # The default unit to use for events which do not have a `CW_unit` field
132
+ # If you set this option you should probably set the "value" option along with it
133
+ config :unit, :validate => VALID_UNITS, :default => COUNT_UNIT
134
+
135
+ # The name of the field used to set the unit on an event metric
136
+ config :field_unit, :validate => :string, :default => "CW_unit"
137
+
138
+ # The default value to use for events which do not have a `CW_value` field
139
+ # If provided, this must be a string which can be converted to a float, for example...
140
+ # "1", "2.34", ".5", and "0.67"
141
+ # If you set this option you should probably set the `unit` option along with it
142
+ config :value, :validate => :string, :default => "1"
143
+
144
+ # The name of the field used to set the value (float) on an event metric
145
+ config :field_value, :validate => :string, :default => "CW_value"
146
+
147
+ # The default dimensions [ name, value, ... ] to use for events which do not have a `CW_dimensions` field
148
+ config :dimensions, :validate => :hash
149
+
150
+ # The name of the field used to set the dimensions on an event metric
151
+ # The field named here, if present in an event, must have an array of
152
+ # one or more key & value pairs, for example...
153
+ # `add_field => [ "CW_dimensions", "Environment", "CW_dimensions", "prod" ]`
154
+ # or, equivalently...
155
+ # `add_field => [ "CW_dimensions", "Environment" ]`
156
+ # `add_field => [ "CW_dimensions", "prod" ]`
157
+ config :field_dimensions, :validate => :string, :default => "CW_dimensions"
158
+
159
+ attr_reader :event_queue
160
+
161
+ public
162
+ def register
163
+ require "thread"
164
+ require "aws-sdk-cloudwatch"
165
+
166
+ @cw = Aws::CloudWatch::Client.new(aws_options_hash)
167
+
168
+ @event_queue = SizedQueue.new(@queue_size)
169
+ @scheduler = Rufus::Scheduler.new
170
+ @job = @scheduler.schedule_every @timeframe do
171
+ @logger.debug("Scheduler Activated")
172
+ publish(aggregate({}))
173
+ end
174
+ end # def register
175
+
176
+ # Rufus::Scheduler >= 3.4 moved the Time impl into a gem EoTime = ::EtOrbi::EoTime`
177
+ # Rufus::Scheduler 3.1 - 3.3 using it's own Time impl `Rufus::Scheduler::ZoTime`
178
+ RufusTimeImpl = defined?(Rufus::Scheduler::EoTime) ? Rufus::Scheduler::EoTime :
179
+ (defined?(Rufus::Scheduler::ZoTime) ? Rufus::Scheduler::ZoTime : ::Time)
180
+
181
+ public
182
+ def receive(event)
183
+ return unless (event.get(@field_metricname) || @metricname)
184
+
185
+ if (@event_queue.length >= @event_queue.max)
186
+ @job.trigger RufusTimeImpl.now
187
+ @logger.warn("Posted to AWS CloudWatch ahead of schedule. If you see this often, consider increasing the cloudwatch queue_size option.")
188
+ end
189
+
190
+ @logger.debug("Queueing event", :event => event)
191
+ @event_queue << event
192
+ end # def receive
193
+
194
+ private
195
+ def publish(aggregates)
196
+ aggregates.each do |namespace, data|
197
+ @logger.debug("Namespace, data: ", :namespace => namespace, :data => data)
198
+ metric_data = []
199
+ data.each do |aggregate_key, stats|
200
+ new_data = {
201
+ :metric_name => aggregate_key[METRIC],
202
+ :timestamp => aggregate_key[TIMESTAMP],
203
+ :unit => aggregate_key[UNIT],
204
+ :statistic_values => {
205
+ :sample_count => stats[COUNT],
206
+ :sum => stats[SUM],
207
+ :minimum => stats[MIN],
208
+ :maximum => stats[MAX],
209
+ }
210
+ }
211
+ dims = aggregate_key[DIMENSIONS]
212
+ if (dims.is_a?(Array) && dims.length > 0 && (dims.length % 2) == 0)
213
+ new_data[:dimensions] = Array.new
214
+ i = 0
215
+ while (i < dims.length)
216
+ new_data[:dimensions] << {:name => dims[i], :value => dims[i+1]}
217
+ i += 2
218
+ end
219
+ end
220
+ metric_data << new_data
221
+ end # data.each
222
+
223
+ metric_data.each_slice(@batch_size) do |batch|
224
+ begin
225
+ @cw.put_metric_data(
226
+ :namespace => namespace,
227
+ :metric_data => batch
228
+ )
229
+ @logger.debug("Sent data to AWS CloudWatch OK", :namespace => namespace, :metric_data => batch)
230
+ rescue Exception => e
231
+ @logger.warn("Failed to send to AWS CloudWatch", :exception => e, :namespace => namespace, :metric_data => batch)
232
+ break
233
+ end
234
+ end
235
+ end # aggregates.each
236
+ return aggregates
237
+ end# def publish
238
+
239
+ private
240
+ def aggregate(aggregates)
241
+ @logger.debug("QUEUE SIZE ", :queuesize => @event_queue.size)
242
+ while !@event_queue.empty? do
243
+ begin
244
+ count(aggregates, @event_queue.pop(true))
245
+ rescue Exception => e
246
+ @logger.warn("Exception! Breaking count loop", :exception => e)
247
+ break
248
+ end
249
+ end
250
+ return aggregates
251
+ end # def aggregate
252
+
253
+ private
254
+ def count(aggregates, event)
255
+ # If the event doesn't declare a namespace, use the default
256
+ fnamespace = field(event, @field_namespace)
257
+ namespace = (fnamespace ? fnamespace : event.sprintf(@namespace))
258
+
259
+ funit = field(event, @field_unit)
260
+ unit = (funit ? funit : event.sprintf(@unit))
261
+
262
+ fvalue = field(event, @field_value)
263
+ value = (fvalue ? fvalue : event.sprintf(@value))
264
+
265
+ # We may get to this point with valid Units but missing value. Send zeros.
266
+ val = (!value) ? 0.0 : value.to_f
267
+
268
+ # Event provides exactly one (but not both) of value or unit
269
+ if ( (fvalue == nil) ^ (funit == nil) )
270
+ @logger.warn("Likely config error: event has one of #{@field_value} or #{@field_unit} fields but not both.", :event => event)
271
+ end
272
+
273
+ # If Unit is still not set or is invalid warn about misconfiguration & use NONE
274
+ if (!VALID_UNITS.include?(unit))
275
+ unit = NONE
276
+ @logger.warn("Likely config error: invalid or missing Units (#{unit.to_s}), using '#{NONE}' instead", :event => event)
277
+ end
278
+
279
+ if (!aggregates[namespace])
280
+ aggregates[namespace] = {}
281
+ end
282
+
283
+ dims = event.get(@field_dimensions)
284
+ if (dims) # event provides dimensions
285
+ # validate the structure
286
+ if (!dims.is_a?(Array) || dims.length == 0 || (dims.length % 2) != 0)
287
+ @logger.warn("Likely config error: CloudWatch dimensions field (#{dims.to_s}) found which is not a positive- & even-length array. Ignoring it.", :event => event)
288
+ dims = nil
289
+ end
290
+ # Best case, we get here and exit the conditional because dims...
291
+ # - is an array
292
+ # - with positive length
293
+ # - and an even number of elements
294
+ elsif (@dimensions.is_a?(Hash)) # event did not provide dimensions, but the output has been configured with a default
295
+ dims = @dimensions.flatten.map{|d| event.sprintf(d)} # into the kind of array described just above
296
+ else
297
+ dims = nil
298
+ end
299
+
300
+ fmetric = field(event, @field_metricname)
301
+ aggregate_key = {
302
+ METRIC => (fmetric ? fmetric : event.sprintf(@metricname)),
303
+ DIMENSIONS => dims,
304
+ UNIT => unit,
305
+ TIMESTAMP => event.sprintf("%{+YYYY-MM-dd'T'HH:mm:00Z}")
306
+ }
307
+
308
+ if (!aggregates[namespace][aggregate_key])
309
+ aggregates[namespace][aggregate_key] = {}
310
+ end
311
+
312
+ if (!aggregates[namespace][aggregate_key][MAX] || val > aggregates[namespace][aggregate_key][MAX])
313
+ aggregates[namespace][aggregate_key][MAX] = val
314
+ end
315
+
316
+ if (!aggregates[namespace][aggregate_key][MIN] || val < aggregates[namespace][aggregate_key][MIN])
317
+ aggregates[namespace][aggregate_key][MIN] = val
318
+ end
319
+
320
+ if (!aggregates[namespace][aggregate_key][COUNT])
321
+ aggregates[namespace][aggregate_key][COUNT] = 1
322
+ else
323
+ aggregates[namespace][aggregate_key][COUNT] += 1
324
+ end
325
+
326
+ if (!aggregates[namespace][aggregate_key][SUM])
327
+ aggregates[namespace][aggregate_key][SUM] = val
328
+ else
329
+ aggregates[namespace][aggregate_key][SUM] += val
330
+ end
331
+ end # def count
332
+
333
+ private
334
+ def field(event, fieldname)
335
+ if !event.get(fieldname)
336
+ return nil
337
+ else
338
+ if event.get(fieldname).is_a?(Array)
339
+ return event.get(fieldname).first
340
+ else
341
+ return event.get(fieldname)
342
+ end
343
+ end
344
+ end # def field
345
+
346
+ end # class LogStash::Outputs::CloudWatch
@@ -0,0 +1,121 @@
1
+ # encoding: utf-8
2
+ require "java"
3
+ require "concurrent"
4
+ require "concurrent/timer_task"
5
+ require "logstash/util"
6
+
7
+ ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
8
+
9
+ module LogStash
10
+ module Outputs
11
+ class S3
12
+ class FileRepository
13
+ DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
14
+ DEFAULT_STALE_TIME_SECS = 15 * 60
15
+ # Ensure that all access or work done
16
+ # on a factory is threadsafe
17
+ class PrefixedValue
18
+ def initialize(file_factory, stale_time)
19
+ @file_factory = file_factory
20
+ @lock = Mutex.new
21
+ @stale_time = stale_time
22
+ end
23
+
24
+ def with_lock
25
+ @lock.synchronize {
26
+ yield @file_factory
27
+ }
28
+ end
29
+
30
+ def stale?
31
+ with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
32
+ end
33
+
34
+ def apply(prefix)
35
+ return self
36
+ end
37
+
38
+ def delete!
39
+ with_lock{ |factory| factory.current.delete! }
40
+ end
41
+ end
42
+
43
+ class FactoryInitializer
44
+ include java.util.function.Function
45
+ def initialize(tags, encoding, temporary_directory, stale_time)
46
+ @tags = tags
47
+ @encoding = encoding
48
+ @temporary_directory = temporary_directory
49
+ @stale_time = stale_time
50
+ end
51
+
52
+ def apply(prefix_key)
53
+ PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
54
+ end
55
+ end
56
+
57
+ def initialize(tags, encoding, temporary_directory,
58
+ stale_time = DEFAULT_STALE_TIME_SECS,
59
+ sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
60
+ # The path need to contains the prefix so when we start
61
+ # logtash after a crash we keep the remote structure
62
+ @prefixed_factories = ConcurrentHashMap.new
63
+
64
+ @sweeper_interval = sweeper_interval
65
+
66
+ @factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
67
+
68
+ start_stale_sweeper
69
+ end
70
+
71
+ def keys
72
+ @prefixed_factories.keySet
73
+ end
74
+
75
+ def each_files
76
+ @prefixed_factories.elements.each do |prefixed_file|
77
+ prefixed_file.with_lock { |factory| yield factory.current }
78
+ end
79
+ end
80
+
81
+ # Return the file factory
82
+ def get_factory(prefix_key)
83
+ @prefixed_factories.computeIfAbsent(prefix_key, @factory_initializer).with_lock { |factory| yield factory }
84
+ end
85
+
86
+ def get_file(prefix_key)
87
+ get_factory(prefix_key) { |factory| yield factory.current }
88
+ end
89
+
90
+ def shutdown
91
+ stop_stale_sweeper
92
+ end
93
+
94
+ def size
95
+ @prefixed_factories.size
96
+ end
97
+
98
+ def remove_stale(k, v)
99
+ if v.stale?
100
+ @prefixed_factories.remove(k, v)
101
+ v.delete!
102
+ end
103
+ end
104
+
105
+ def start_stale_sweeper
106
+ @stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
107
+ LogStash::Util.set_thread_name("S3, Stale factory sweeper")
108
+
109
+ @prefixed_factories.forEach{|k,v| remove_stale(k,v)}
110
+ end
111
+
112
+ @stale_sweeper.execute
113
+ end
114
+
115
+ def stop_stale_sweeper
116
+ @stale_sweeper.shutdown
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class PathValidator
6
+ INVALID_CHARACTERS = "\^`><"
7
+
8
+ def self.valid?(name)
9
+ name.match(matches_re).nil?
10
+ end
11
+
12
+ def self.matches_re
13
+ /[#{Regexp.escape(INVALID_CHARACTERS)}]/
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/s3/size_rotation_policy"
3
+ require "logstash/outputs/s3/time_rotation_policy"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class S3
8
+ class SizeAndTimeRotationPolicy
9
+ def initialize(file_size, time_file)
10
+ @size_strategy = SizeRotationPolicy.new(file_size)
11
+ @time_strategy = TimeRotationPolicy.new(time_file)
12
+ end
13
+
14
+ def rotate?(file)
15
+ @size_strategy.rotate?(file) || @time_strategy.rotate?(file)
16
+ end
17
+
18
+ def needs_periodic?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class SizeRotationPolicy
6
+ attr_reader :size_file
7
+
8
+ def initialize(size_file)
9
+ if size_file <= 0
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
11
+ end
12
+
13
+ @size_file = size_file
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size >= size_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ false
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+ require "thread"
3
+ require "forwardable"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class S3
9
+ # Wrap the actual file descriptor into an utility classe
10
+ # It make it more OOP and easier to reason with the paths.
11
+ class TemporaryFile
12
+ extend Forwardable
13
+
14
+ def_delegators :@fd, :path, :write, :close, :fsync
15
+
16
+ attr_reader :fd
17
+
18
+ def initialize(key, fd, temp_path)
19
+ @fd = fd
20
+ @key = key
21
+ @temp_path = temp_path
22
+ @created_at = Time.now
23
+ end
24
+
25
+ def ctime
26
+ @created_at
27
+ end
28
+
29
+ def temp_path
30
+ @temp_path
31
+ end
32
+
33
+ def size
34
+ # Use the fd size to get the accurate result,
35
+ # so we dont have to deal with fsync
36
+ # if the file is close we will use the File::size
37
+ begin
38
+ @fd.size
39
+ rescue IOError
40
+ ::File.size(path)
41
+ end
42
+ end
43
+
44
+ def key
45
+ @key.gsub(/^\//, "")
46
+ end
47
+
48
+ # Each temporary file is made inside a directory named with an UUID,
49
+ # instead of deleting the file directly and having the risk of deleting other files
50
+ # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
51
+ # a sandbox.
52
+ def delete!
53
+ @fd.close rescue IOError # force close anyway
54
+ FileUtils.rm_r(@temp_path, :secure => true)
55
+ end
56
+
57
+ def empty?
58
+ size == 0
59
+ end
60
+
61
+ def self.create_from_existing_file(file_path, temporary_folder)
62
+ key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
63
+
64
+ TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
65
+ ::File.open(file_path, "r"),
66
+ ::File.join(temporary_folder, key_parts.slice(0, 1)))
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end