RubyGems - logstash-integration-aws - Versions diffs - 7.1.1-java - Mend

logstash-integration-aws 7.1.1-java

Files changed (93) hide show

checksums.yaml +7 -0
data/CHANGELOG.PRE.MERGE.md +658 -0
data/CHANGELOG.md +33 -0
data/CONTRIBUTORS +40 -0
data/Gemfile +11 -0
data/LICENSE +202 -0
data/NOTICE.TXT +5 -0
data/README.md +205 -0
data/VERSION +1 -0
data/docs/codec-cloudfront.asciidoc +53 -0
data/docs/codec-cloudtrail.asciidoc +45 -0
data/docs/index.asciidoc +36 -0
data/docs/input-cloudwatch.asciidoc +320 -0
data/docs/input-s3.asciidoc +346 -0
data/docs/input-sqs.asciidoc +287 -0
data/docs/output-cloudwatch.asciidoc +321 -0
data/docs/output-s3.asciidoc +442 -0
data/docs/output-sns.asciidoc +166 -0
data/docs/output-sqs.asciidoc +242 -0
data/lib/logstash/codecs/cloudfront.rb +84 -0
data/lib/logstash/codecs/cloudtrail.rb +47 -0
data/lib/logstash/inputs/cloudwatch.rb +338 -0
data/lib/logstash/inputs/s3.rb +466 -0
data/lib/logstash/inputs/sqs.rb +196 -0
data/lib/logstash/outputs/cloudwatch.rb +346 -0
data/lib/logstash/outputs/s3/file_repository.rb +193 -0
data/lib/logstash/outputs/s3/path_validator.rb +18 -0
data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
data/lib/logstash/outputs/s3/temporary_file.rb +114 -0
data/lib/logstash/outputs/s3/temporary_file_factory.rb +126 -0
data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
data/lib/logstash/outputs/s3/uploader.rb +76 -0
data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
data/lib/logstash/outputs/s3.rb +442 -0
data/lib/logstash/outputs/sns.rb +133 -0
data/lib/logstash/outputs/sqs.rb +167 -0
data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
data/lib/logstash-integration-aws_jars.rb +4 -0
data/lib/tasks/build.rake +15 -0
data/logstash-integration-aws.gemspec +55 -0
data/spec/codecs/cloudfront_spec.rb +92 -0
data/spec/codecs/cloudtrail_spec.rb +56 -0
data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
data/spec/fixtures/cloudfront.log +4 -0
data/spec/fixtures/compressed.log.gee.zip +0 -0
data/spec/fixtures/compressed.log.gz +0 -0
data/spec/fixtures/compressed.log.gzip +0 -0
data/spec/fixtures/invalid_utf8.gbk.log +2 -0
data/spec/fixtures/json.log +2 -0
data/spec/fixtures/json_with_message.log +2 -0
data/spec/fixtures/multiline.log +6 -0
data/spec/fixtures/multiple_compressed_streams.gz +0 -0
data/spec/fixtures/uncompressed.log +2 -0
data/spec/inputs/cloudwatch_spec.rb +85 -0
data/spec/inputs/s3_spec.rb +610 -0
data/spec/inputs/sincedb_spec.rb +17 -0
data/spec/inputs/sqs_spec.rb +324 -0
data/spec/integration/cloudwatch_spec.rb +25 -0
data/spec/integration/dynamic_prefix_spec.rb +92 -0
data/spec/integration/gzip_file_spec.rb +62 -0
data/spec/integration/gzip_size_rotation_spec.rb +63 -0
data/spec/integration/outputs/sqs_spec.rb +98 -0
data/spec/integration/restore_from_crash_spec.rb +133 -0
data/spec/integration/s3_spec.rb +66 -0
data/spec/integration/size_rotation_spec.rb +59 -0
data/spec/integration/sqs_spec.rb +110 -0
data/spec/integration/stress_test_spec.rb +60 -0
data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
data/spec/outputs/cloudwatch_spec.rb +38 -0
data/spec/outputs/s3/file_repository_spec.rb +143 -0
data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
data/spec/outputs/s3/temporary_file_spec.rb +47 -0
data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
data/spec/outputs/s3/uploader_spec.rb +69 -0
data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
data/spec/outputs/s3_spec.rb +232 -0
data/spec/outputs/sns_spec.rb +160 -0
data/spec/plugin_mixin/aws_config_spec.rb +217 -0
data/spec/spec_helper.rb +8 -0
data/spec/support/helpers.rb +121 -0
data/spec/unit/outputs/sqs_spec.rb +247 -0
data/vendor/jar-dependencies/org/logstash/plugins/integration/aws/logstash-integration-aws/7.1.1/logstash-integration-aws-7.1.1.jar +0 -0
metadata +472 -0

data/lib/logstash/outputs/cloudwatch.rb ADDED Viewed

@@ -0,0 +1,346 @@
+# encoding: utf-8
+require "logstash/outputs/base"
+require "logstash/namespace"
+require "logstash/plugin_mixins/aws_config"
+require "rufus/scheduler"
+# This output lets you aggregate and send metric data to AWS CloudWatch
+#
+# ==== Summary:
+# This plugin is intended to be used on a logstash indexer agent (but that
+# is not the only way, see below.)  In the intended scenario, one cloudwatch
+# output plugin is configured, on the logstash indexer node, with just AWS API
+# credentials, and possibly a region and/or a namespace.  The output looks
+# for fields present in events, and when it finds them, it uses them to
+# calculate aggregate statistics.  If the `metricname` option is set in this
+# output, then any events which pass through it will be aggregated & sent to
+# CloudWatch, but that is not recommended.  The intended use is to NOT set the
+# metricname option here, and instead to add a `CW_metricname` field (and other
+# fields) to only the events you want sent to CloudWatch.
+#
+# When events pass through this output they are queued for background
+# aggregation and sending, which happens every minute by default.  The
+# queue has a maximum size, and when it is full aggregated statistics will be
+# sent to CloudWatch ahead of schedule. Whenever this happens a warning
+# message is written to logstash's log.  If you see this you should increase
+# the `queue_size` configuration option to avoid the extra API calls.  The queue
+# is emptied every time we send data to CloudWatch.
+#
+# Note: when logstash is stopped the queue is destroyed before it can be processed.
+# This is a known limitation of logstash and will hopefully be addressed in a
+# future version.
+#
+# ==== Details:
+# There are two ways to configure this plugin, and they can be used in
+# combination: event fields & per-output defaults
+#
+# Event Field configuration...
+# You add fields to your events in inputs & filters and this output reads
+# those fields to aggregate events.  The names of the fields read are
+# configurable via the `field_*` options.
+#
+# Per-output defaults...
+# You set universal defaults in this output plugin's configuration, and
+# if an event does not have a field for that option then the default is
+# used.
+#
+# Notice, the event fields take precedence over the per-output defaults.
+#
+# At a minimum events must have a "metric name" to be sent to CloudWatch.
+# This can be achieved either by providing a default here OR by adding a
+# `CW_metricname` field. By default, if no other configuration is provided
+# besides a metric name, then events will be counted (Unit: Count, Value: 1)
+# by their metric name (either a default or from their `CW_metricname` field)
+#
+# Other fields which can be added to events to modify the behavior of this
+# plugin are, `CW_namespace`, `CW_unit`, `CW_value`, and
+# `CW_dimensions`.  All of these field names are configurable in
+# this output.  You can also set per-output defaults for any of them.
+# See below for details.
+#
+# Read more about http://aws.amazon.com/cloudwatch/[AWS CloudWatch],
+# and the specific of API endpoint this output uses,
+# http://docs.amazonwebservices.com/AmazonCloudWatch/latest/APIReference/API_PutMetricData.html[PutMetricData]
+class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base
+  include LogStash::PluginMixins::AwsConfig::V2
+  config_name "cloudwatch"
+  # Constants
+  # aggregate_key members
+  DIMENSIONS = "dimensions"
+  TIMESTAMP = "timestamp"
+  METRIC = "metric"
+  COUNT = "count"
+  UNIT = "unit"
+  SUM = "sum"
+  MIN = "min"
+  MAX = "max"
+  # Units
+  COUNT_UNIT = "Count"
+  NONE = "None"
+  # How often to send data to CloudWatch
+  # This does not affect the event timestamps, events will always have their
+  # actual timestamp (to-the-minute) sent to CloudWatch.
+  #
+  # We only call the API if there is data to send.
+  #
+  # See the Rufus Scheduler docs for an https://github.com/jmettraux/rufus-scheduler#the-time-strings-understood-by-rufus-scheduler[explanation of allowed values]
+  config :timeframe, :validate => :string, :default => "1m"
+  # How many events to queue before forcing a call to the CloudWatch API ahead of `timeframe` schedule
+  # Set this to the number of events-per-timeframe you will be sending to CloudWatch to avoid extra API calls
+  config :queue_size, :validate => :number, :default => 10000
+  # How many data points can be given in one call to the CloudWatch API
+  config :batch_size, :validate => :number, :default => 20
+  # The default namespace to use for events which do not have a `CW_namespace` field
+  config :namespace, :validate => :string, :default => "Logstash"
+  # The name of the field used to set a different namespace per event
+  # Note: Only one namespace can be sent to CloudWatch per API call
+  # so setting different namespaces will increase the number of API calls
+  # and those cost money.
+  config :field_namespace, :validate => :string, :default => "CW_namespace"
+  # The default metric name to use for events which do not have a `CW_metricname` field.
+  # Beware: If this is provided then all events which pass through this output will be aggregated and
+  # sent to CloudWatch, so use this carefully.  Furthermore, when providing this option, you
+  # will probably want to also restrict events from passing through this output using event
+  # type, tag, and field matching
+  config :metricname, :validate => :string
+  # The name of the field used to set the metric name on an event
+  # The author of this plugin recommends adding this field to events in inputs &
+  # filters rather than using the per-output default setting so that one output
+  # plugin on your logstash indexer can serve all events (which of course had
+  # fields set on your logstash shippers.)
+  config :field_metricname, :validate => :string, :default => "CW_metricname"
+  VALID_UNITS = ["Seconds", "Microseconds", "Milliseconds", "Bytes",
+                 "Kilobytes", "Megabytes", "Gigabytes", "Terabytes",
+                 "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits",
+                 "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second",
+                 "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second",
+                 "Bits/Second", "Kilobits/Second", "Megabits/Second",
+                 "Gigabits/Second", "Terabits/Second", "Count/Second", NONE]
+  # The default unit to use for events which do not have a `CW_unit` field
+  # If you set this option you should probably set the "value" option along with it
+  config :unit, :validate => VALID_UNITS, :default => COUNT_UNIT
+  # The name of the field used to set the unit on an event metric
+  config :field_unit, :validate => :string, :default => "CW_unit"
+  # The default value to use for events which do not have a `CW_value` field
+  # If provided, this must be a string which can be converted to a float, for example...
+  #     "1", "2.34", ".5", and "0.67"
+  # If you set this option you should probably set the `unit` option along with it
+  config :value, :validate => :string, :default => "1"
+  # The name of the field used to set the value (float) on an event metric
+  config :field_value, :validate => :string, :default => "CW_value"
+  # The default dimensions [ name, value, ... ] to use for events which do not have a `CW_dimensions` field
+  config :dimensions, :validate => :hash
+  # The name of the field used to set the dimensions on an event metric
+  # The field named here, if present in an event, must have an array of
+  # one or more key & value pairs, for example...
+  #     `add_field => [ "CW_dimensions", "Environment", "CW_dimensions", "prod" ]`
+  # or, equivalently...
+  #     `add_field => [ "CW_dimensions", "Environment" ]`
+  #     `add_field => [ "CW_dimensions", "prod" ]`
+  config :field_dimensions, :validate => :string, :default => "CW_dimensions"
+  attr_reader :event_queue
+  public
+  def register
+    require "thread"
+    require "aws-sdk-cloudwatch"
+    @cw = Aws::CloudWatch::Client.new(aws_options_hash)
+    @event_queue = SizedQueue.new(@queue_size)
+    @scheduler = Rufus::Scheduler.new
+    @job = @scheduler.schedule_every @timeframe do
+      @logger.debug("Scheduler Activated")
+      publish(aggregate({}))
+    end
+  end # def register
+  # Rufus::Scheduler >= 3.4 moved the Time impl into a gem EoTime = ::EtOrbi::EoTime`
+  # Rufus::Scheduler 3.1 - 3.3 using it's own Time impl `Rufus::Scheduler::ZoTime`
+  RufusTimeImpl = defined?(Rufus::Scheduler::EoTime) ? Rufus::Scheduler::EoTime :
+                      (defined?(Rufus::Scheduler::ZoTime) ? Rufus::Scheduler::ZoTime : ::Time)
+  public
+  def receive(event)
+    return unless (event.get(@field_metricname) || @metricname)
+    if (@event_queue.length >= @event_queue.max)
+      @job.trigger RufusTimeImpl.now
+      @logger.warn("Posted to AWS CloudWatch ahead of schedule.  If you see this often, consider increasing the cloudwatch queue_size option.")
+    end
+    @logger.debug("Queueing event", :event => event)
+    @event_queue << event
+  end # def receive
+  private
+  def publish(aggregates)
+    aggregates.each do |namespace, data|
+      @logger.debug("Namespace, data: ", :namespace => namespace, :data => data)
+      metric_data = []
+      data.each do |aggregate_key, stats|
+        new_data = {
+            :metric_name => aggregate_key[METRIC],
+            :timestamp => aggregate_key[TIMESTAMP],
+            :unit => aggregate_key[UNIT],
+            :statistic_values => {
+                :sample_count => stats[COUNT],
+                :sum => stats[SUM],
+                :minimum => stats[MIN],
+                :maximum => stats[MAX],
+            }
+        }
+        dims = aggregate_key[DIMENSIONS]
+        if (dims.is_a?(Array) && dims.length > 0 && (dims.length % 2) == 0)
+          new_data[:dimensions] = Array.new
+          i = 0
+          while (i < dims.length)
+            new_data[:dimensions] << {:name => dims[i], :value => dims[i+1]}
+            i += 2
+          end
+        end
+        metric_data << new_data
+      end # data.each
+      metric_data.each_slice(@batch_size) do |batch|
+        begin
+          @cw.put_metric_data(
+              :namespace => namespace,
+              :metric_data => batch
+          )
+          @logger.debug("Sent data to AWS CloudWatch OK", :namespace => namespace, :metric_data => batch)
+        rescue Exception => e
+          @logger.warn("Failed to send to AWS CloudWatch", :exception => e, :namespace => namespace, :metric_data => batch)
+          break
+        end
+      end
+    end # aggregates.each
+    return aggregates
+  end# def publish
+  private
+  def aggregate(aggregates)
+    @logger.debug("QUEUE SIZE ", :queuesize => @event_queue.size)
+    while !@event_queue.empty? do
+      begin
+        count(aggregates, @event_queue.pop(true))
+      rescue Exception => e
+        @logger.warn("Exception!  Breaking count loop", :exception => e)
+        break
+      end
+    end
+    return aggregates
+  end # def aggregate
+  private
+  def count(aggregates, event)
+    # If the event doesn't declare a namespace, use the default
+    fnamespace = field(event, @field_namespace)
+    namespace = (fnamespace ? fnamespace : event.sprintf(@namespace))
+    funit = field(event, @field_unit)
+    unit = (funit ? funit : event.sprintf(@unit))
+    fvalue = field(event, @field_value)
+    value = (fvalue ? fvalue : event.sprintf(@value))
+    # We may get to this point with valid Units but missing value.  Send zeros.
+    val = (!value) ? 0.0 : value.to_f
+    # Event provides exactly one (but not both) of value or unit
+    if ( (fvalue == nil) ^ (funit == nil) )
+      @logger.warn("Likely config error: event has one of #{@field_value} or #{@field_unit} fields but not both.", :event => event)
+    end
+    # If Unit is still not set or is invalid warn about misconfiguration & use NONE
+    if (!VALID_UNITS.include?(unit))
+      unit = NONE
+      @logger.warn("Likely config error: invalid or missing Units (#{unit.to_s}), using '#{NONE}' instead", :event => event)
+    end
+    if (!aggregates[namespace])
+      aggregates[namespace] = {}
+    end
+    dims = event.get(@field_dimensions)
+    if (dims) # event provides dimensions
+              # validate the structure
+      if (!dims.is_a?(Array) || dims.length == 0 || (dims.length % 2) != 0)
+        @logger.warn("Likely config error: CloudWatch dimensions field (#{dims.to_s}) found which is not a positive- & even-length array.  Ignoring it.", :event => event)
+        dims = nil
+      end
+              # Best case, we get here and exit the conditional because dims...
+              # - is an array
+              # - with positive length
+              # - and an even number of elements
+    elsif (@dimensions.is_a?(Hash)) # event did not provide dimensions, but the output has been configured with a default
+      dims = @dimensions.flatten.map{|d| event.sprintf(d)} # into the kind of array described just above
+    else
+      dims = nil
+    end
+    fmetric = field(event, @field_metricname)
+    aggregate_key = {
+        METRIC => (fmetric ? fmetric : event.sprintf(@metricname)),
+        DIMENSIONS => dims,
+        UNIT => unit,
+        TIMESTAMP => event.sprintf("%{+YYYY-MM-dd'T'HH:mm:00Z}")
+    }
+    if (!aggregates[namespace][aggregate_key])
+      aggregates[namespace][aggregate_key] = {}
+    end
+    if (!aggregates[namespace][aggregate_key][MAX] || val > aggregates[namespace][aggregate_key][MAX])
+      aggregates[namespace][aggregate_key][MAX] = val
+    end
+    if (!aggregates[namespace][aggregate_key][MIN] || val < aggregates[namespace][aggregate_key][MIN])
+      aggregates[namespace][aggregate_key][MIN] = val
+    end
+    if (!aggregates[namespace][aggregate_key][COUNT])
+      aggregates[namespace][aggregate_key][COUNT] = 1
+    else
+      aggregates[namespace][aggregate_key][COUNT] += 1
+    end
+    if (!aggregates[namespace][aggregate_key][SUM])
+      aggregates[namespace][aggregate_key][SUM] = val
+    else
+      aggregates[namespace][aggregate_key][SUM] += val
+    end
+  end # def count
+  private
+  def field(event, fieldname)
+    if !event.get(fieldname)
+      return nil
+    else
+      if event.get(fieldname).is_a?(Array)
+        return event.get(fieldname).first
+      else
+        return event.get(fieldname)
+      end
+    end
+  end # def field
+end # class LogStash::Outputs::CloudWatch

data/lib/logstash/outputs/s3/file_repository.rb ADDED Viewed

@@ -0,0 +1,193 @@
+# encoding: utf-8
+require "java"
+require "concurrent"
+require "concurrent/timer_task"
+require "logstash/util"
+ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
+module LogStash
+  module Outputs
+    class S3
+      class FileRepository
+        DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
+        DEFAULT_STALE_TIME_SECS = 15 * 60
+        # Ensure that all access or work done
+        # on a factory is threadsafe
+        class PrefixedValue
+          def initialize(file_factory, stale_time)
+            @file_factory = file_factory
+            @lock = Monitor.new # reentrant Mutex
+            @stale_time = stale_time
+            @is_deleted = false
+          end
+          def with_lock
+            @lock.synchronize {
+              yield @file_factory
+            }
+          end
+          def stale?
+            with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
+          end
+          def apply(prefix)
+            return self
+          end
+          def delete!
+            with_lock do |factory|
+              factory.current.delete!
+              @is_deleted = true
+            end
+          end
+          def deleted?
+            with_lock { |_| @is_deleted }
+          end
+        end
+        class FactoryInitializer
+          include java.util.function.Function
+          def initialize(tags, encoding, temporary_directory, stale_time)
+            @tags = tags
+            @encoding = encoding
+            @temporary_directory = temporary_directory
+            @stale_time = stale_time
+          end
+          def apply(prefix_key)
+            PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
+          end
+        end
+        def initialize(tags, encoding, temporary_directory,
+                       stale_time = DEFAULT_STALE_TIME_SECS,
+                       sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
+          # The path need to contains the prefix so when we start
+          # logtash after a crash we keep the remote structure
+          @prefixed_factories =  ConcurrentHashMap.new
+          @sweeper_interval = sweeper_interval
+          @factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
+          start_stale_sweeper
+        end
+        def keys
+          @prefixed_factories.keySet
+        end
+        ##
+        # Yields the current file of each non-deleted file factory while the current thread has exclusive access to it.
+        # @yieldparam file [TemporaryFile]
+        # @return [void]
+        def each_files
+          each_factory(keys) do |factory|
+            yield factory.current
+          end
+          nil # void return avoid leaking unsynchronized access
+        end
+        ##
+        # Yields the file factory while the current thread has exclusive access to it, creating a new
+        # one if one does not exist or if the current one is being reaped by the stale watcher.
+        # @param prefix_key [String]: the prefix key
+        # @yieldparam factory [TemporaryFileFactory]: a temporary file factory that this thread has exclusive access to
+        # @return [void]
+        def get_factory(prefix_key)
+          # fast-path: if factory exists and is not deleted, yield it with exclusive access and return
+          prefix_val = @prefixed_factories.get(prefix_key)
+          prefix_val&.with_lock do |factory|
+            # intentional local-jump to ensure deletion detection
+            # is done inside the exclusive access.
+            unless prefix_val.deleted?
+              yield(factory)
+              return nil # void return avoid leaking unsynchronized access
+            end
+          end
+          # slow-path:
+          # the ConcurrentHashMap#get operation is lock-free, but may have returned an entry that was being deleted by
+          # another thread (such as via stale detection). If we failed to retrieve a value, or retrieved one that had
+          # been marked deleted, use the atomic ConcurrentHashMap#compute to retrieve a non-deleted entry.
+          prefix_val = @prefixed_factories.compute(prefix_key) do |_, existing|
+            existing && !existing.deleted? ? existing : @factory_initializer.apply(prefix_key)
+          end
+          prefix_val.with_lock { |factory| yield factory }
+          nil # void return avoid leaking unsynchronized access
+        end
+        ##
+        # Yields each non-deleted file factory while the current thread has exclusive access to it.
+        # @param prefixes [Array<String>]: the prefix keys
+        # @yieldparam factory [TemporaryFileFactory]
+        # @return [void]
+        def each_factory(prefixes)
+          prefixes.each do |prefix_key|
+            prefix_val = @prefixed_factories.get(prefix_key)
+            prefix_val&.with_lock do |factory|
+              yield factory unless prefix_val.deleted?
+            end
+          end
+          nil # void return avoid leaking unsynchronized access
+        end
+        ##
+        # Ensures that a non-deleted factory exists for the provided prefix and yields its current file
+        # while the current thread has exclusive access to it.
+        # @param prefix_key [String]
+        # @yieldparam file [TemporaryFile]
+        # @return [void]
+        def get_file(prefix_key)
+          get_factory(prefix_key) { |factory| yield factory.current }
+          nil # void return avoid leaking unsynchronized access
+        end
+        def shutdown
+          stop_stale_sweeper
+        end
+        def size
+          @prefixed_factories.size
+        end
+        def remove_if_stale(prefix_key)
+          # we use the ATOMIC `ConcurrentHashMap#computeIfPresent` to atomically
+          # detect the staleness, mark a stale prefixed factory as deleted, and delete from the map.
+          @prefixed_factories.computeIfPresent(prefix_key) do |_, prefixed_factory|
+            # once we have retrieved an instance, we acquire exclusive access to it
+            # for stale detection, marking it as deleted before releasing the lock
+            # and causing it to become deleted from the map.
+            prefixed_factory.with_lock do |_|
+              if prefixed_factory.stale?
+                prefixed_factory.delete!  # mark deleted to prevent reuse
+                nil # cause deletion
+              else
+                prefixed_factory # keep existing
+              end
+            end
+          end
+        end
+        def start_stale_sweeper
+          @stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
+            LogStash::Util.set_thread_name("S3, Stale factory sweeper")
+            @prefixed_factories.keys.each do |prefix|
+              remove_if_stale(prefix)
+            end
+          end
+          @stale_sweeper.execute
+        end
+        def stop_stale_sweeper
+          @stale_sweeper.shutdown
+        end
+      end
+    end
+  end
+end

data/lib/logstash/outputs/s3/path_validator.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# encoding: utf-8
+module LogStash
+  module Outputs
+    class S3
+      class PathValidator
+        INVALID_CHARACTERS = "\^`><"
+        def self.valid?(name)
+          name.match(matches_re).nil?
+        end
+        def self.matches_re
+          /[#{Regexp.escape(INVALID_CHARACTERS)}]/
+        end
+      end
+    end
+  end
+end

data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb ADDED Viewed

@@ -0,0 +1,24 @@
+# encoding: utf-8
+require "logstash/outputs/s3/size_rotation_policy"
+require "logstash/outputs/s3/time_rotation_policy"
+module LogStash
+  module Outputs
+    class S3
+      class SizeAndTimeRotationPolicy
+        def initialize(file_size, time_file)
+          @size_strategy = SizeRotationPolicy.new(file_size)
+          @time_strategy = TimeRotationPolicy.new(time_file)
+        end
+        def rotate?(file)
+          @size_strategy.rotate?(file) || @time_strategy.rotate?(file)
+        end
+        def needs_periodic?
+          true
+        end
+      end
+    end
+  end
+end

data/lib/logstash/outputs/s3/size_rotation_policy.rb ADDED Viewed

@@ -0,0 +1,26 @@
+# encoding: utf-8
+module LogStash
+  module Outputs
+    class S3
+      class SizeRotationPolicy
+        attr_reader :size_file
+        def initialize(size_file)
+          if size_file <= 0
+            raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
+          end
+          @size_file = size_file
+        end
+        def rotate?(file)
+          file.size >= size_file
+        end
+        def needs_periodic?
+          false
+        end
+      end
+    end
+  end
+end