logstash-integration-aws 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +15 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/docs/codec-cloudfront.asciidoc +53 -0
  10. data/docs/codec-cloudtrail.asciidoc +45 -0
  11. data/docs/index.asciidoc +38 -0
  12. data/docs/input-cloudwatch.asciidoc +320 -0
  13. data/docs/input-s3.asciidoc +346 -0
  14. data/docs/input-sqs.asciidoc +287 -0
  15. data/docs/output-cloudwatch.asciidoc +321 -0
  16. data/docs/output-s3.asciidoc +442 -0
  17. data/docs/output-sns.asciidoc +166 -0
  18. data/docs/output-sqs.asciidoc +242 -0
  19. data/lib/logstash/codecs/cloudfront.rb +84 -0
  20. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  21. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  22. data/lib/logstash/inputs/s3.rb +466 -0
  23. data/lib/logstash/inputs/sqs.rb +196 -0
  24. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  25. data/lib/logstash/outputs/s3/file_repository.rb +121 -0
  26. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  27. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  28. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  29. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  30. data/lib/logstash/outputs/s3/temporary_file_factory.rb +129 -0
  31. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  32. data/lib/logstash/outputs/s3/uploader.rb +74 -0
  33. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  34. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  35. data/lib/logstash/outputs/s3.rb +405 -0
  36. data/lib/logstash/outputs/sns.rb +133 -0
  37. data/lib/logstash/outputs/sqs.rb +167 -0
  38. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  39. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  40. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  41. data/logstash-integration-aws.gemspec +52 -0
  42. data/spec/codecs/cloudfront_spec.rb +92 -0
  43. data/spec/codecs/cloudtrail_spec.rb +56 -0
  44. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  45. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  46. data/spec/fixtures/cloudfront.log +4 -0
  47. data/spec/fixtures/compressed.log.gee.zip +0 -0
  48. data/spec/fixtures/compressed.log.gz +0 -0
  49. data/spec/fixtures/compressed.log.gzip +0 -0
  50. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  51. data/spec/fixtures/json.log +2 -0
  52. data/spec/fixtures/json_with_message.log +2 -0
  53. data/spec/fixtures/multiline.log +6 -0
  54. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  55. data/spec/fixtures/uncompressed.log +2 -0
  56. data/spec/inputs/cloudwatch_spec.rb +85 -0
  57. data/spec/inputs/s3_spec.rb +610 -0
  58. data/spec/inputs/sincedb_spec.rb +17 -0
  59. data/spec/inputs/sqs_spec.rb +324 -0
  60. data/spec/integration/cloudwatch_spec.rb +25 -0
  61. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  62. data/spec/integration/gzip_file_spec.rb +62 -0
  63. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  64. data/spec/integration/outputs/sqs_spec.rb +98 -0
  65. data/spec/integration/restore_from_crash_spec.rb +67 -0
  66. data/spec/integration/s3_spec.rb +66 -0
  67. data/spec/integration/size_rotation_spec.rb +59 -0
  68. data/spec/integration/sqs_spec.rb +110 -0
  69. data/spec/integration/stress_test_spec.rb +60 -0
  70. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  71. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  72. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  73. data/spec/outputs/cloudwatch_spec.rb +38 -0
  74. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  75. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  76. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  77. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  78. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  79. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  80. data/spec/outputs/s3/uploader_spec.rb +69 -0
  81. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  82. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  83. data/spec/outputs/s3_spec.rb +232 -0
  84. data/spec/outputs/sns_spec.rb +160 -0
  85. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  86. data/spec/spec_helper.rb +8 -0
  87. data/spec/support/helpers.rb +119 -0
  88. data/spec/unit/outputs/sqs_spec.rb +247 -0
  89. metadata +467 -0
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+ require "socket"
3
+ require "securerandom"
4
+ require "fileutils"
5
+ require "zlib"
6
+ require "forwardable"
7
+
8
+ module LogStash
9
+ module Outputs
10
+ class S3
11
+ # Since the file can contains dynamic part, we have to handle a more local structure to
12
+ # allow a nice recovery from a crash.
13
+ #
14
+ # The local structure will look like this.
15
+ #
16
+ # <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
17
+ #
18
+ # Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
19
+ # I do not have to mess around to check if the other directory have file in it before destroying them.
20
+ class TemporaryFileFactory
21
+ FILE_MODE = "a"
22
+ GZIP_ENCODING = "gzip"
23
+ GZIP_EXTENSION = "txt.gz"
24
+ TXT_EXTENSION = "txt"
25
+ STRFTIME = "%Y-%m-%dT%H.%M"
26
+
27
+ attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
28
+
29
+ def initialize(prefix, tags, encoding, temporary_directory)
30
+ @counter = 0
31
+ @prefix = prefix
32
+
33
+ @tags = tags
34
+ @encoding = encoding
35
+ @temporary_directory = temporary_directory
36
+ @lock = Mutex.new
37
+
38
+ rotate!
39
+ end
40
+
41
+ def rotate!
42
+ @lock.synchronize {
43
+ @current = new_file
44
+ increment_counter
45
+ @current
46
+ }
47
+ end
48
+
49
+ private
50
+ def extension
51
+ gzip? ? GZIP_EXTENSION : TXT_EXTENSION
52
+ end
53
+
54
+ def gzip?
55
+ encoding == GZIP_ENCODING
56
+ end
57
+
58
+ def increment_counter
59
+ @counter += 1
60
+ end
61
+
62
+ def current_time
63
+ Time.now.strftime(STRFTIME)
64
+ end
65
+
66
+ def generate_name
67
+ filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
68
+
69
+ if tags.size > 0
70
+ "#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
71
+ else
72
+ "#{filename}.part#{counter}.#{extension}"
73
+ end
74
+ end
75
+
76
+ def new_file
77
+ uuid = SecureRandom.uuid
78
+ name = generate_name
79
+ path = ::File.join(temporary_directory, uuid)
80
+ key = ::File.join(prefix, name)
81
+
82
+ FileUtils.mkdir_p(::File.join(path, prefix))
83
+
84
+ io = if gzip?
85
+ # We have to use this wrapper because we cannot access the size of the
86
+ # file directly on the gzip writer.
87
+ IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
88
+ else
89
+ ::File.open(::File.join(path, key), FILE_MODE)
90
+ end
91
+
92
+ TemporaryFile.new(key, io, path)
93
+ end
94
+
95
+ class IOWrappedGzip
96
+ extend Forwardable
97
+
98
+ def_delegators :@gzip_writer, :write, :close
99
+ attr_reader :file_io, :gzip_writer
100
+
101
+ def initialize(file_io)
102
+ @file_io = file_io
103
+ @gzip_writer = Zlib::GzipWriter.new(file_io)
104
+ end
105
+
106
+ def path
107
+ @gzip_writer.to_io.path
108
+ end
109
+
110
+ def size
111
+ # to get the current file size
112
+ if @gzip_writer.pos == 0
113
+ # Ensure a zero file size is returned when nothing has
114
+ # yet been written to the gzip file.
115
+ 0
116
+ else
117
+ @gzip_writer.flush
118
+ @gzip_writer.to_io.size
119
+ end
120
+ end
121
+
122
+ def fsync
123
+ @gzip_writer.to_io.fsync
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class TimeRotationPolicy
6
+ attr_reader :time_file
7
+
8
+ def initialize(time_file)
9
+ if time_file <= 0
10
+ raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
11
+ end
12
+
13
+ @time_file = time_file * 60
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size > 0 && (Time.now - file.ctime) >= time_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ true
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,74 @@
1
+ # encoding: utf-8
2
+ require "logstash/util"
3
+ require "aws-sdk-core"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class S3
8
+ class Uploader
9
+
10
+ DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
11
+ :min_threads => 1,
12
+ :max_threads => 8,
13
+ :max_queue => 1,
14
+ :fallback_policy => :caller_runs
15
+ })
16
+
17
+ attr_reader :bucket, :upload_options, :logger
18
+
19
+ def initialize(bucket, logger, threadpool = DEFAULT_THREADPOOL, retry_count: Float::INFINITY, retry_delay: 1)
20
+ @bucket = bucket
21
+ @workers_pool = threadpool
22
+ @logger = logger
23
+ @retry_count = retry_count
24
+ @retry_delay = retry_delay
25
+ end
26
+
27
+ def upload_async(file, options = {})
28
+ @workers_pool.post do
29
+ LogStash::Util.set_thread_name("S3 output uploader, file: #{file.path}")
30
+ upload(file, options)
31
+ end
32
+ end
33
+
34
+ def upload(file, options = {})
35
+ upload_options = options.fetch(:upload_options, {})
36
+
37
+ tries = 0
38
+ begin
39
+ obj = bucket.object(file.key)
40
+ obj.upload_file(file.path, upload_options)
41
+ rescue Errno::ENOENT => e
42
+ logger.error("File doesn't exist! Unrecoverable error.", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
43
+ rescue => e
44
+ # When we get here it usually mean that S3 tried to do some retry by himself (default is 3)
45
+ # When the retry limit is reached or another error happen we will wait and retry.
46
+ #
47
+ # Thread might be stuck here, but I think its better than losing anything
48
+ # its either a transient errors or something bad really happened.
49
+ if tries < @retry_count
50
+ tries += 1
51
+ logger.warn("Uploading failed, retrying (##{tries} of #{@retry_count})", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
52
+ sleep @retry_delay
53
+ retry
54
+ else
55
+ logger.error("Failed to upload file (retried #{@retry_count} times).", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
56
+ end
57
+ end
58
+
59
+ begin
60
+ options[:on_complete].call(file) unless options[:on_complete].nil?
61
+ rescue => e
62
+ logger.error("An error occurred in the `on_complete` uploader", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
63
+ raise e # reraise it since we don't deal with it now
64
+ end
65
+ end
66
+
67
+ def stop
68
+ @workers_pool.shutdown
69
+ @workers_pool.wait_for_termination(nil) # block until its done
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class WritableDirectoryValidator
6
+ def self.valid?(path)
7
+ begin
8
+ FileUtils.mkdir_p(path) unless Dir.exist?(path)
9
+ ::File.writable?(path)
10
+ rescue
11
+ false
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,60 @@
1
+ # encoding: utf-8
2
+ require "stud/temporary"
3
+ require "socket"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class S3
9
+ class WriteBucketPermissionValidator
10
+ attr_reader :logger
11
+
12
+ def initialize(logger)
13
+ @logger = logger
14
+ end
15
+
16
+ def valid?(bucket_resource, upload_options = {})
17
+ begin
18
+ upload_test_file(bucket_resource, upload_options)
19
+ true
20
+ rescue StandardError => e
21
+ logger.error("Error validating bucket write permissions!",
22
+ :message => e.message,
23
+ :class => e.class.name,
24
+ :backtrace => e.backtrace
25
+ )
26
+ false
27
+ end
28
+ end
29
+
30
+ private
31
+ def upload_test_file(bucket_resource, upload_options = {})
32
+ generated_at = Time.now
33
+
34
+ key = "logstash-programmatic-access-test-object-#{generated_at}"
35
+ content = "Logstash permission check on #{generated_at}, by #{Socket.gethostname}"
36
+
37
+ begin
38
+ f = Stud::Temporary.file
39
+ f.write(content)
40
+ f.fsync
41
+
42
+ obj = bucket_resource.object(key)
43
+ obj.upload_file(f, upload_options)
44
+
45
+ begin
46
+ obj.delete
47
+ rescue
48
+ # Try to remove the files on the remote bucket,
49
+ # but don't raise any errors if that doesn't work.
50
+ # since we only really need `putobject`.
51
+ end
52
+ ensure
53
+ f.close
54
+ FileUtils.rm_rf(f.path)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end