logstash-integration-aws 0.1.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +15 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/docs/codec-cloudfront.asciidoc +53 -0
  10. data/docs/codec-cloudtrail.asciidoc +45 -0
  11. data/docs/index.asciidoc +38 -0
  12. data/docs/input-cloudwatch.asciidoc +320 -0
  13. data/docs/input-s3.asciidoc +346 -0
  14. data/docs/input-sqs.asciidoc +287 -0
  15. data/docs/output-cloudwatch.asciidoc +321 -0
  16. data/docs/output-s3.asciidoc +442 -0
  17. data/docs/output-sns.asciidoc +166 -0
  18. data/docs/output-sqs.asciidoc +242 -0
  19. data/lib/logstash/codecs/cloudfront.rb +84 -0
  20. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  21. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  22. data/lib/logstash/inputs/s3.rb +466 -0
  23. data/lib/logstash/inputs/sqs.rb +196 -0
  24. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  25. data/lib/logstash/outputs/s3/file_repository.rb +121 -0
  26. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  27. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  28. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  29. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  30. data/lib/logstash/outputs/s3/temporary_file_factory.rb +129 -0
  31. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  32. data/lib/logstash/outputs/s3/uploader.rb +74 -0
  33. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  34. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  35. data/lib/logstash/outputs/s3.rb +405 -0
  36. data/lib/logstash/outputs/sns.rb +133 -0
  37. data/lib/logstash/outputs/sqs.rb +167 -0
  38. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  39. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  40. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  41. data/logstash-integration-aws.gemspec +52 -0
  42. data/spec/codecs/cloudfront_spec.rb +92 -0
  43. data/spec/codecs/cloudtrail_spec.rb +56 -0
  44. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  45. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  46. data/spec/fixtures/cloudfront.log +4 -0
  47. data/spec/fixtures/compressed.log.gee.zip +0 -0
  48. data/spec/fixtures/compressed.log.gz +0 -0
  49. data/spec/fixtures/compressed.log.gzip +0 -0
  50. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  51. data/spec/fixtures/json.log +2 -0
  52. data/spec/fixtures/json_with_message.log +2 -0
  53. data/spec/fixtures/multiline.log +6 -0
  54. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  55. data/spec/fixtures/uncompressed.log +2 -0
  56. data/spec/inputs/cloudwatch_spec.rb +85 -0
  57. data/spec/inputs/s3_spec.rb +610 -0
  58. data/spec/inputs/sincedb_spec.rb +17 -0
  59. data/spec/inputs/sqs_spec.rb +324 -0
  60. data/spec/integration/cloudwatch_spec.rb +25 -0
  61. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  62. data/spec/integration/gzip_file_spec.rb +62 -0
  63. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  64. data/spec/integration/outputs/sqs_spec.rb +98 -0
  65. data/spec/integration/restore_from_crash_spec.rb +67 -0
  66. data/spec/integration/s3_spec.rb +66 -0
  67. data/spec/integration/size_rotation_spec.rb +59 -0
  68. data/spec/integration/sqs_spec.rb +110 -0
  69. data/spec/integration/stress_test_spec.rb +60 -0
  70. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  71. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  72. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  73. data/spec/outputs/cloudwatch_spec.rb +38 -0
  74. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  75. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  76. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  77. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  78. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  79. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  80. data/spec/outputs/s3/uploader_spec.rb +69 -0
  81. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  82. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  83. data/spec/outputs/s3_spec.rb +232 -0
  84. data/spec/outputs/sns_spec.rb +160 -0
  85. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  86. data/spec/spec_helper.rb +8 -0
  87. data/spec/support/helpers.rb +119 -0
  88. data/spec/unit/outputs/sqs_spec.rb +247 -0
  89. metadata +467 -0
@@ -0,0 +1,338 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/plugin_mixins/aws_config"
5
+ require "logstash/util"
6
+ require "stud/interval"
7
+ require "aws-sdk-cloudwatch"
8
+
9
+ # Pull events from the Amazon Web Services CloudWatch API.
10
+ #
11
+ # To use this plugin, you *must* have an AWS account, and the following policy.
12
+ #
13
+ # Typically, you should setup an IAM policy, create a user and apply the IAM policy to the user.
14
+ #
15
+ # A sample policy for EC2 metrics is as follows:
16
+ #
17
+ # [source,json]
18
+ # {
19
+ # "Version": "2012-10-17",
20
+ # "Statement": [
21
+ # {
22
+ # "Sid": "Stmt1444715676000",
23
+ # "Effect": "Allow",
24
+ # "Action": [
25
+ # "cloudwatch:GetMetricStatistics",
26
+ # "cloudwatch:ListMetrics"
27
+ # ],
28
+ # "Resource": "*"
29
+ # },
30
+ # {
31
+ # "Sid": "Stmt1444716576170",
32
+ # "Effect": "Allow",
33
+ # "Action": [
34
+ # "ec2:DescribeInstances"
35
+ # ],
36
+ # "Resource": "*"
37
+ # }
38
+ # ]
39
+ # }
40
+ #
41
+ # See http://aws.amazon.com/iam/ for more details on setting up AWS identities.
42
+ #
43
+ # # Configuration Example
44
+ # [source, ruby]
45
+ # input {
46
+ # cloudwatch {
47
+ # namespace => "AWS/EC2"
48
+ # metrics => [ "CPUUtilization" ]
49
+ # filters => { "tag:Group" => "API-Production" }
50
+ # region => "us-east-1"
51
+ # }
52
+ # }
53
+ #
54
+ # input {
55
+ # cloudwatch {
56
+ # namespace => "AWS/EBS"
57
+ # metrics => ["VolumeQueueLength"]
58
+ # filters => { "tag:Monitoring" => "Yes" }
59
+ # region => "us-east-1"
60
+ # }
61
+ # }
62
+ #
63
+ # input {
64
+ # cloudwatch {
65
+ # namespace => "AWS/RDS"
66
+ # metrics => ["CPUUtilization", "CPUCreditUsage"]
67
+ # filters => { "EngineName" => "mysql" } # Only supports EngineName, DatabaseClass and DBInstanceIdentifier
68
+ # region => "us-east-1"
69
+ # }
70
+ # }
71
+ #
72
+
73
+ class LogStash::Inputs::CloudWatch < LogStash::Inputs::Base
74
+ include LogStash::PluginMixins::AwsConfig::V2
75
+
76
+ config_name "cloudwatch"
77
+
78
+ # If undefined, LogStash will complain, even if codec is unused.
79
+ default :codec, "plain"
80
+
81
+ # The service namespace of the metrics to fetch.
82
+ #
83
+ # The default is for the EC2 service.
84
+ #
85
+ # See http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/aws-namespaces.html
86
+ # for valid values.
87
+ config :namespace, :validate => :string, :default => 'AWS/EC2'
88
+
89
+ # Specify the metrics to fetch for the namespace. The defaults are AWS/EC2 specific.
90
+ #
91
+ # See http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/aws-namespaces.html
92
+ # for the available metrics for other namespaces.
93
+ config :metrics, :validate => :array, :default => [ 'CPUUtilization', 'DiskReadOps', 'DiskWriteOps', 'NetworkIn', 'NetworkOut' ]
94
+
95
+ # Specify the statistics to fetch for each namespace
96
+ config :statistics, :validate => :array, :default => [ 'SampleCount', 'Average', 'Minimum', 'Maximum', 'Sum' ]
97
+
98
+ # Set how frequently CloudWatch should be queried
99
+ #
100
+ # The default, `900`, means check every 15 minutes. Setting this value too low
101
+ # (generally less than 300) results in no metrics being returned from CloudWatch.
102
+ config :interval, :validate => :number, :default => (60 * 15)
103
+
104
+ # Set the granularity of the returned datapoints.
105
+ #
106
+ # Must be at least 60 seconds and in multiples of 60.
107
+ config :period, :validate => :number, :default => (60 * 5)
108
+
109
+ # Specify the filters to apply when fetching resources:
110
+ #
111
+ # Instances: { 'instance-id' => 'i-12344321' }
112
+ # Tags: { 'tag:Environment' => 'Production' }
113
+ # Volumes: { 'attachment.status' => 'attached' }
114
+ #
115
+ # This needs to follow the AWS convention of specifiying filters.
116
+ #
117
+ # Each namespace uniquely supports certain dimensions. Consult the documentation
118
+ # to ensure you're using valid filters.
119
+ config :filters, :validate => :array
120
+
121
+ # Use this for namespaces that need to combine the dimensions like S3 and SNS.
122
+ config :combined, :validate => :boolean, :default => false
123
+
124
+ def aws_service_endpoint(region)
125
+ { region: region }
126
+ end
127
+
128
+ def register
129
+ raise 'Interval needs to be higher than period' unless @interval >= @period
130
+ raise 'Interval must be divisible by period' unless @interval % @period == 0
131
+ raise "Filters must be defined for when using #{@namespace} namespace" if @filters.nil? && filters_required?(@namespace)
132
+
133
+ @last_check = Time.now
134
+ end # def register
135
+
136
+ def filters_required?(namespace)
137
+ case namespace
138
+ when 'AWS/EC2'
139
+ false
140
+ else
141
+ true
142
+ end
143
+ end
144
+
145
+ # Runs the poller to get metrics for the provided namespace
146
+ #
147
+ # @param queue [Array] Logstash queue
148
+ def run(queue)
149
+ while !stop?
150
+ start = Time.now
151
+
152
+ @logger.info('Polling CloudWatch API')
153
+
154
+ raise 'No metrics to query' unless metrics_for(@namespace).count > 0
155
+
156
+ # For every metric
157
+ metrics_for(@namespace).each do |metric|
158
+ @logger.debug "Polling metric #{metric}"
159
+ if @filters.nil?
160
+ from_resources(queue, metric)
161
+ else
162
+ @logger.debug "Filters: #{aws_filters}"
163
+ @combined ? from_filters(queue, metric) : from_resources(queue, metric)
164
+ end
165
+ end
166
+ sleep_for = @interval - (Time.now - start)
167
+ Stud.stoppable_sleep(sleep_for) { stop? } if sleep_for > 0
168
+ end # loop
169
+ end # def run
170
+
171
+ private
172
+
173
+ # Gets metrics from provided resources.
174
+ #
175
+ # @param queue [Array] Logstash queue
176
+ # @param metric [String] Metric name
177
+ def from_resources(queue, metric)
178
+ # For every dimension in the metric
179
+ resources.each_pair do |dimension, dim_resources|
180
+ # For every resource in the dimension
181
+ dim_resources = *dim_resources
182
+ dim_resources.each do |resource|
183
+ @logger.debug "Polling resource #{dimension}: #{resource}"
184
+
185
+ options = metric_options(@namespace, metric)
186
+ options[:dimensions] = [ { name: dimension, value: resource } ]
187
+
188
+ datapoints = clients['CloudWatch'].get_metric_statistics(options)
189
+ @logger.debug "DPs: #{datapoints.data}"
190
+ # For every event in the resource
191
+ datapoints[:datapoints].each do |datapoint|
192
+ event_hash = datapoint.to_hash.update(options)
193
+ event_hash[dimension.to_sym] = resource
194
+ event = LogStash::Event.new(cleanup(event_hash))
195
+ decorate(event)
196
+ queue << event
197
+ end
198
+ end
199
+ end
200
+ end
201
+
202
+ # Gets metrics from provided filter options
203
+ #
204
+ # @param queue [Array] Logstash queue
205
+ # @param metric [String] Metric name
206
+ def from_filters(queue, metric)
207
+ options = metric_options(@namespace, metric)
208
+ options[:dimensions] = aws_filters
209
+ @logger.debug "Dim: #{options[:dimensions]}"
210
+
211
+ datapoints = clients['CloudWatch'].get_metric_statistics(options)
212
+ @logger.debug "DPs: #{datapoints.data}"
213
+
214
+ datapoints[:datapoints].each do |datapoint|
215
+ event_hash = datapoint.to_hash.update(options)
216
+ aws_filters.each do |dimension|
217
+ event_hash[dimension[:name].to_sym] = dimension[:value]
218
+ end
219
+
220
+ event = LogStash::Event.new(cleanup(event_hash))
221
+ decorate(event)
222
+ queue << event
223
+ end
224
+ end
225
+
226
+ # Cleans up an event to remove unneeded fields and format time
227
+ #
228
+ # @param event [Hash] Raw event
229
+ #
230
+ # @return [Hash] Cleaned event
231
+ def cleanup(event)
232
+ event.delete :statistics
233
+ event.delete :dimensions
234
+ event[:start_time] = Time.parse(event[:start_time]).utc
235
+ event[:end_time] = Time.parse(event[:end_time]).utc
236
+ LogStash::Util.stringify_symbols(event)
237
+ end
238
+
239
+ # Dynamic AWS client instantiator for retrieving the proper client
240
+ # for the provided namespace
241
+ #
242
+ # @return [Hash]
243
+ def clients
244
+ @clients ||= Hash.new do |client_hash, namespace|
245
+ namespace = namespace[4..-1] if namespace[0..3] == 'AWS/'
246
+ namespace = 'EC2' if namespace == 'EBS'
247
+ cls = Aws.const_get(namespace)
248
+ # TODO: Move logger configuration into mixin.
249
+ client_hash[namespace] = cls::Client.new(aws_options_hash.merge(:logger => @logger))
250
+ end
251
+ end
252
+
253
+ # Gets metrics for a provided namespace based on the union of available and
254
+ # found metrics
255
+ #
256
+ # @param namespace [String] Namespace to retrieve metrics for
257
+ #
258
+ # @return [Hash]
259
+ def metrics_for(namespace)
260
+ metrics_available[namespace] & @metrics
261
+ end
262
+
263
+ # Gets available metrics for a given namespace
264
+ #
265
+ # @return [Hash]
266
+ def metrics_available
267
+ @metrics_available ||= Hash.new do |metrics_hash, namespace|
268
+ metrics_hash[namespace] = []
269
+
270
+ clients['CloudWatch'].list_metrics({ namespace: namespace })[:metrics].each do |metrics|
271
+ metrics_hash[namespace].push metrics[:metric_name]
272
+ end
273
+ metrics_hash[namespace]
274
+ end
275
+ end
276
+
277
+ # Gets options for querying against Cloudwatch for a given metric and namespace
278
+ #
279
+ # @param namespace [String] Namespace to query in
280
+ # @param metric [String] Metric to query for
281
+ #
282
+ # @return [Hash]
283
+ def metric_options(namespace, metric)
284
+ {
285
+ namespace: namespace,
286
+ metric_name: metric,
287
+ start_time: (Time.now - @interval).iso8601,
288
+ end_time: Time.now.iso8601,
289
+ period: @period,
290
+ statistics: @statistics
291
+ }
292
+ end
293
+
294
+ # Filters used in querying the AWS SDK for resources
295
+ #
296
+ # @return [Array]
297
+ def aws_filters
298
+ @filters.collect do |key, value|
299
+ if @combined
300
+ { name: key, value: value }
301
+ else
302
+ value = [value] unless value.is_a? Array
303
+ { name: key, values: value }
304
+ end
305
+ end
306
+ end
307
+
308
+ # Gets resources based on the provided namespace
309
+ #
310
+ # @see http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/CW_Support_For_AWS.html
311
+ #
312
+ # @return [Array]
313
+ def resources
314
+ case @namespace
315
+ when 'AWS/EC2'
316
+ instances = clients[@namespace].describe_instances(filter_options)[:reservations].collect do |r|
317
+ r[:instances].collect{ |i| i[:instance_id] }
318
+ end.flatten
319
+
320
+ { 'InstanceId' => instances }
321
+ when 'AWS/EBS'
322
+ volumes = clients[@namespace].describe_volumes(filters: aws_filters)[:volumes].collect do |a|
323
+ a[:attachments].collect{ |v| v[:volume_id] }
324
+ end.flatten
325
+
326
+ @logger.debug "AWS/EBS Volumes: #{volumes}"
327
+
328
+ { 'VolumeId' => volumes }
329
+ else
330
+ @filters
331
+ end
332
+ end
333
+
334
+ def filter_options
335
+ @filters.nil? ? {} : { :filters => aws_filters }
336
+ end
337
+
338
+ end # class LogStash::Inputs::CloudWatch