logstash-integration-aws 0.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.PRE.MERGE.md +658 -0
  3. data/CHANGELOG.md +15 -0
  4. data/CONTRIBUTORS +40 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +202 -0
  7. data/NOTICE.TXT +5 -0
  8. data/README.md +205 -0
  9. data/docs/codec-cloudfront.asciidoc +53 -0
  10. data/docs/codec-cloudtrail.asciidoc +45 -0
  11. data/docs/index.asciidoc +38 -0
  12. data/docs/input-cloudwatch.asciidoc +320 -0
  13. data/docs/input-s3.asciidoc +346 -0
  14. data/docs/input-sqs.asciidoc +287 -0
  15. data/docs/output-cloudwatch.asciidoc +321 -0
  16. data/docs/output-s3.asciidoc +442 -0
  17. data/docs/output-sns.asciidoc +166 -0
  18. data/docs/output-sqs.asciidoc +242 -0
  19. data/lib/logstash/codecs/cloudfront.rb +84 -0
  20. data/lib/logstash/codecs/cloudtrail.rb +47 -0
  21. data/lib/logstash/inputs/cloudwatch.rb +338 -0
  22. data/lib/logstash/inputs/s3.rb +466 -0
  23. data/lib/logstash/inputs/sqs.rb +196 -0
  24. data/lib/logstash/outputs/cloudwatch.rb +346 -0
  25. data/lib/logstash/outputs/s3/file_repository.rb +121 -0
  26. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  27. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  28. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  29. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  30. data/lib/logstash/outputs/s3/temporary_file_factory.rb +129 -0
  31. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  32. data/lib/logstash/outputs/s3/uploader.rb +74 -0
  33. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  34. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
  35. data/lib/logstash/outputs/s3.rb +405 -0
  36. data/lib/logstash/outputs/sns.rb +133 -0
  37. data/lib/logstash/outputs/sqs.rb +167 -0
  38. data/lib/logstash/plugin_mixins/aws_config/generic.rb +54 -0
  39. data/lib/logstash/plugin_mixins/aws_config/v2.rb +93 -0
  40. data/lib/logstash/plugin_mixins/aws_config.rb +8 -0
  41. data/logstash-integration-aws.gemspec +52 -0
  42. data/spec/codecs/cloudfront_spec.rb +92 -0
  43. data/spec/codecs/cloudtrail_spec.rb +56 -0
  44. data/spec/fixtures/aws_credentials_file_sample_test.yml +2 -0
  45. data/spec/fixtures/aws_temporary_credentials_file_sample_test.yml +3 -0
  46. data/spec/fixtures/cloudfront.log +4 -0
  47. data/spec/fixtures/compressed.log.gee.zip +0 -0
  48. data/spec/fixtures/compressed.log.gz +0 -0
  49. data/spec/fixtures/compressed.log.gzip +0 -0
  50. data/spec/fixtures/invalid_utf8.gbk.log +2 -0
  51. data/spec/fixtures/json.log +2 -0
  52. data/spec/fixtures/json_with_message.log +2 -0
  53. data/spec/fixtures/multiline.log +6 -0
  54. data/spec/fixtures/multiple_compressed_streams.gz +0 -0
  55. data/spec/fixtures/uncompressed.log +2 -0
  56. data/spec/inputs/cloudwatch_spec.rb +85 -0
  57. data/spec/inputs/s3_spec.rb +610 -0
  58. data/spec/inputs/sincedb_spec.rb +17 -0
  59. data/spec/inputs/sqs_spec.rb +324 -0
  60. data/spec/integration/cloudwatch_spec.rb +25 -0
  61. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  62. data/spec/integration/gzip_file_spec.rb +62 -0
  63. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  64. data/spec/integration/outputs/sqs_spec.rb +98 -0
  65. data/spec/integration/restore_from_crash_spec.rb +67 -0
  66. data/spec/integration/s3_spec.rb +66 -0
  67. data/spec/integration/size_rotation_spec.rb +59 -0
  68. data/spec/integration/sqs_spec.rb +110 -0
  69. data/spec/integration/stress_test_spec.rb +60 -0
  70. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  71. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  72. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  73. data/spec/outputs/cloudwatch_spec.rb +38 -0
  74. data/spec/outputs/s3/file_repository_spec.rb +143 -0
  75. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  76. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  77. data/spec/outputs/s3/temporary_file_factory_spec.rb +89 -0
  78. data/spec/outputs/s3/temporary_file_spec.rb +47 -0
  79. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  80. data/spec/outputs/s3/uploader_spec.rb +69 -0
  81. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  82. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +49 -0
  83. data/spec/outputs/s3_spec.rb +232 -0
  84. data/spec/outputs/sns_spec.rb +160 -0
  85. data/spec/plugin_mixin/aws_config_spec.rb +217 -0
  86. data/spec/spec_helper.rb +8 -0
  87. data/spec/support/helpers.rb +119 -0
  88. data/spec/unit/outputs/sqs_spec.rb +247 -0
  89. metadata +467 -0
@@ -0,0 +1,338 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/base"
3
+ require "logstash/namespace"
4
+ require "logstash/plugin_mixins/aws_config"
5
+ require "logstash/util"
6
+ require "stud/interval"
7
+ require "aws-sdk-cloudwatch"
8
+
9
+ # Pull events from the Amazon Web Services CloudWatch API.
10
+ #
11
+ # To use this plugin, you *must* have an AWS account, and the following policy.
12
+ #
13
+ # Typically, you should setup an IAM policy, create a user and apply the IAM policy to the user.
14
+ #
15
+ # A sample policy for EC2 metrics is as follows:
16
+ #
17
+ # [source,json]
18
+ # {
19
+ # "Version": "2012-10-17",
20
+ # "Statement": [
21
+ # {
22
+ # "Sid": "Stmt1444715676000",
23
+ # "Effect": "Allow",
24
+ # "Action": [
25
+ # "cloudwatch:GetMetricStatistics",
26
+ # "cloudwatch:ListMetrics"
27
+ # ],
28
+ # "Resource": "*"
29
+ # },
30
+ # {
31
+ # "Sid": "Stmt1444716576170",
32
+ # "Effect": "Allow",
33
+ # "Action": [
34
+ # "ec2:DescribeInstances"
35
+ # ],
36
+ # "Resource": "*"
37
+ # }
38
+ # ]
39
+ # }
40
+ #
41
+ # See http://aws.amazon.com/iam/ for more details on setting up AWS identities.
42
+ #
43
+ # # Configuration Example
44
+ # [source, ruby]
45
+ # input {
46
+ # cloudwatch {
47
+ # namespace => "AWS/EC2"
48
+ # metrics => [ "CPUUtilization" ]
49
+ # filters => { "tag:Group" => "API-Production" }
50
+ # region => "us-east-1"
51
+ # }
52
+ # }
53
+ #
54
+ # input {
55
+ # cloudwatch {
56
+ # namespace => "AWS/EBS"
57
+ # metrics => ["VolumeQueueLength"]
58
+ # filters => { "tag:Monitoring" => "Yes" }
59
+ # region => "us-east-1"
60
+ # }
61
+ # }
62
+ #
63
+ # input {
64
+ # cloudwatch {
65
+ # namespace => "AWS/RDS"
66
+ # metrics => ["CPUUtilization", "CPUCreditUsage"]
67
+ # filters => { "EngineName" => "mysql" } # Only supports EngineName, DatabaseClass and DBInstanceIdentifier
68
+ # region => "us-east-1"
69
+ # }
70
+ # }
71
+ #
72
+
73
+ class LogStash::Inputs::CloudWatch < LogStash::Inputs::Base
74
+ include LogStash::PluginMixins::AwsConfig::V2
75
+
76
+ config_name "cloudwatch"
77
+
78
+ # If undefined, LogStash will complain, even if codec is unused.
79
+ default :codec, "plain"
80
+
81
+ # The service namespace of the metrics to fetch.
82
+ #
83
+ # The default is for the EC2 service.
84
+ #
85
+ # See http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/aws-namespaces.html
86
+ # for valid values.
87
+ config :namespace, :validate => :string, :default => 'AWS/EC2'
88
+
89
+ # Specify the metrics to fetch for the namespace. The defaults are AWS/EC2 specific.
90
+ #
91
+ # See http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/aws-namespaces.html
92
+ # for the available metrics for other namespaces.
93
+ config :metrics, :validate => :array, :default => [ 'CPUUtilization', 'DiskReadOps', 'DiskWriteOps', 'NetworkIn', 'NetworkOut' ]
94
+
95
+ # Specify the statistics to fetch for each namespace
96
+ config :statistics, :validate => :array, :default => [ 'SampleCount', 'Average', 'Minimum', 'Maximum', 'Sum' ]
97
+
98
+ # Set how frequently CloudWatch should be queried
99
+ #
100
+ # The default, `900`, means check every 15 minutes. Setting this value too low
101
+ # (generally less than 300) results in no metrics being returned from CloudWatch.
102
+ config :interval, :validate => :number, :default => (60 * 15)
103
+
104
+ # Set the granularity of the returned datapoints.
105
+ #
106
+ # Must be at least 60 seconds and in multiples of 60.
107
+ config :period, :validate => :number, :default => (60 * 5)
108
+
109
+ # Specify the filters to apply when fetching resources:
110
+ #
111
+ # Instances: { 'instance-id' => 'i-12344321' }
112
+ # Tags: { 'tag:Environment' => 'Production' }
113
+ # Volumes: { 'attachment.status' => 'attached' }
114
+ #
115
+ # This needs to follow the AWS convention of specifiying filters.
116
+ #
117
+ # Each namespace uniquely supports certain dimensions. Consult the documentation
118
+ # to ensure you're using valid filters.
119
+ config :filters, :validate => :array
120
+
121
+ # Use this for namespaces that need to combine the dimensions like S3 and SNS.
122
+ config :combined, :validate => :boolean, :default => false
123
+
124
+ def aws_service_endpoint(region)
125
+ { region: region }
126
+ end
127
+
128
+ def register
129
+ raise 'Interval needs to be higher than period' unless @interval >= @period
130
+ raise 'Interval must be divisible by period' unless @interval % @period == 0
131
+ raise "Filters must be defined for when using #{@namespace} namespace" if @filters.nil? && filters_required?(@namespace)
132
+
133
+ @last_check = Time.now
134
+ end # def register
135
+
136
+ def filters_required?(namespace)
137
+ case namespace
138
+ when 'AWS/EC2'
139
+ false
140
+ else
141
+ true
142
+ end
143
+ end
144
+
145
+ # Runs the poller to get metrics for the provided namespace
146
+ #
147
+ # @param queue [Array] Logstash queue
148
+ def run(queue)
149
+ while !stop?
150
+ start = Time.now
151
+
152
+ @logger.info('Polling CloudWatch API')
153
+
154
+ raise 'No metrics to query' unless metrics_for(@namespace).count > 0
155
+
156
+ # For every metric
157
+ metrics_for(@namespace).each do |metric|
158
+ @logger.debug "Polling metric #{metric}"
159
+ if @filters.nil?
160
+ from_resources(queue, metric)
161
+ else
162
+ @logger.debug "Filters: #{aws_filters}"
163
+ @combined ? from_filters(queue, metric) : from_resources(queue, metric)
164
+ end
165
+ end
166
+ sleep_for = @interval - (Time.now - start)
167
+ Stud.stoppable_sleep(sleep_for) { stop? } if sleep_for > 0
168
+ end # loop
169
+ end # def run
170
+
171
+ private
172
+
173
+ # Gets metrics from provided resources.
174
+ #
175
+ # @param queue [Array] Logstash queue
176
+ # @param metric [String] Metric name
177
+ def from_resources(queue, metric)
178
+ # For every dimension in the metric
179
+ resources.each_pair do |dimension, dim_resources|
180
+ # For every resource in the dimension
181
+ dim_resources = *dim_resources
182
+ dim_resources.each do |resource|
183
+ @logger.debug "Polling resource #{dimension}: #{resource}"
184
+
185
+ options = metric_options(@namespace, metric)
186
+ options[:dimensions] = [ { name: dimension, value: resource } ]
187
+
188
+ datapoints = clients['CloudWatch'].get_metric_statistics(options)
189
+ @logger.debug "DPs: #{datapoints.data}"
190
+ # For every event in the resource
191
+ datapoints[:datapoints].each do |datapoint|
192
+ event_hash = datapoint.to_hash.update(options)
193
+ event_hash[dimension.to_sym] = resource
194
+ event = LogStash::Event.new(cleanup(event_hash))
195
+ decorate(event)
196
+ queue << event
197
+ end
198
+ end
199
+ end
200
+ end
201
+
202
+ # Gets metrics from provided filter options
203
+ #
204
+ # @param queue [Array] Logstash queue
205
+ # @param metric [String] Metric name
206
+ def from_filters(queue, metric)
207
+ options = metric_options(@namespace, metric)
208
+ options[:dimensions] = aws_filters
209
+ @logger.debug "Dim: #{options[:dimensions]}"
210
+
211
+ datapoints = clients['CloudWatch'].get_metric_statistics(options)
212
+ @logger.debug "DPs: #{datapoints.data}"
213
+
214
+ datapoints[:datapoints].each do |datapoint|
215
+ event_hash = datapoint.to_hash.update(options)
216
+ aws_filters.each do |dimension|
217
+ event_hash[dimension[:name].to_sym] = dimension[:value]
218
+ end
219
+
220
+ event = LogStash::Event.new(cleanup(event_hash))
221
+ decorate(event)
222
+ queue << event
223
+ end
224
+ end
225
+
226
+ # Cleans up an event to remove unneeded fields and format time
227
+ #
228
+ # @param event [Hash] Raw event
229
+ #
230
+ # @return [Hash] Cleaned event
231
+ def cleanup(event)
232
+ event.delete :statistics
233
+ event.delete :dimensions
234
+ event[:start_time] = Time.parse(event[:start_time]).utc
235
+ event[:end_time] = Time.parse(event[:end_time]).utc
236
+ LogStash::Util.stringify_symbols(event)
237
+ end
238
+
239
+ # Dynamic AWS client instantiator for retrieving the proper client
240
+ # for the provided namespace
241
+ #
242
+ # @return [Hash]
243
+ def clients
244
+ @clients ||= Hash.new do |client_hash, namespace|
245
+ namespace = namespace[4..-1] if namespace[0..3] == 'AWS/'
246
+ namespace = 'EC2' if namespace == 'EBS'
247
+ cls = Aws.const_get(namespace)
248
+ # TODO: Move logger configuration into mixin.
249
+ client_hash[namespace] = cls::Client.new(aws_options_hash.merge(:logger => @logger))
250
+ end
251
+ end
252
+
253
+ # Gets metrics for a provided namespace based on the union of available and
254
+ # found metrics
255
+ #
256
+ # @param namespace [String] Namespace to retrieve metrics for
257
+ #
258
+ # @return [Hash]
259
+ def metrics_for(namespace)
260
+ metrics_available[namespace] & @metrics
261
+ end
262
+
263
+ # Gets available metrics for a given namespace
264
+ #
265
+ # @return [Hash]
266
+ def metrics_available
267
+ @metrics_available ||= Hash.new do |metrics_hash, namespace|
268
+ metrics_hash[namespace] = []
269
+
270
+ clients['CloudWatch'].list_metrics({ namespace: namespace })[:metrics].each do |metrics|
271
+ metrics_hash[namespace].push metrics[:metric_name]
272
+ end
273
+ metrics_hash[namespace]
274
+ end
275
+ end
276
+
277
+ # Gets options for querying against Cloudwatch for a given metric and namespace
278
+ #
279
+ # @param namespace [String] Namespace to query in
280
+ # @param metric [String] Metric to query for
281
+ #
282
+ # @return [Hash]
283
+ def metric_options(namespace, metric)
284
+ {
285
+ namespace: namespace,
286
+ metric_name: metric,
287
+ start_time: (Time.now - @interval).iso8601,
288
+ end_time: Time.now.iso8601,
289
+ period: @period,
290
+ statistics: @statistics
291
+ }
292
+ end
293
+
294
+ # Filters used in querying the AWS SDK for resources
295
+ #
296
+ # @return [Array]
297
+ def aws_filters
298
+ @filters.collect do |key, value|
299
+ if @combined
300
+ { name: key, value: value }
301
+ else
302
+ value = [value] unless value.is_a? Array
303
+ { name: key, values: value }
304
+ end
305
+ end
306
+ end
307
+
308
+ # Gets resources based on the provided namespace
309
+ #
310
+ # @see http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/CW_Support_For_AWS.html
311
+ #
312
+ # @return [Array]
313
+ def resources
314
+ case @namespace
315
+ when 'AWS/EC2'
316
+ instances = clients[@namespace].describe_instances(filter_options)[:reservations].collect do |r|
317
+ r[:instances].collect{ |i| i[:instance_id] }
318
+ end.flatten
319
+
320
+ { 'InstanceId' => instances }
321
+ when 'AWS/EBS'
322
+ volumes = clients[@namespace].describe_volumes(filters: aws_filters)[:volumes].collect do |a|
323
+ a[:attachments].collect{ |v| v[:volume_id] }
324
+ end.flatten
325
+
326
+ @logger.debug "AWS/EBS Volumes: #{volumes}"
327
+
328
+ { 'VolumeId' => volumes }
329
+ else
330
+ @filters
331
+ end
332
+ end
333
+
334
+ def filter_options
335
+ @filters.nil? ? {} : { :filters => aws_filters }
336
+ end
337
+
338
+ end # class LogStash::Inputs::CloudWatch